iamtgiri · December 11, 2025 09:14
diff --git a/parallel_sum.cpp b/parallel_sum.cpp
 /*

 How Parallel Computing Works to Calculate the Sum of an Array Using OpenMP

 OpenMP internally does 5 things to achieve parallelism in the sum calculation:

 1. Creates a team of threads (fork)
    If the machine has 8 cores, OpenMP might create 8 threads:
    T0, T1, T2, T3, T4, T5, T6, T7
    This is the "fork" part of the fork-join model.

 2. Divides the loop iterations among threads
    Each thread gets a portion of the array to work on.
    For example:
    - T0 processes arr[0] to arr[12499999]
    - T1 processes arr[12500000] to arr[24999999]
    - and so on...
    This division is called work-sharing.

 3. Each thread performs its own partial sum (private accumulation)
    Each thread gets a local copy of par_sum.
    Let us call them: par_sum_T0, par_sum_T1, par_sum_T2...

    Each thread sums its chunk:
    T0 → adds arr[0...12499999]
    T1 → adds arr[12500000...24999999]
    ...

 4. At the end, OpenMP merges (reduces) all partial sums into the final sum
    OpenMP takes all the par_sum_Ti values and adds them together:
    par_sum = par_sum_T0 + par_sum_T1 + par_sum_T2 + ...
    This is the "join" step, and it happens in a safe, synchronized way.

 5. Destroys threads → returns to single-thread execution
    After merging the results, OpenMP shuts down the thread team.
    The rest of the code runs normally on a single main thread.


 Below is a complete C++ program that demonstrates this process using OpenMP to
 compute the sum of a large array both sequentially and in parallel, while measuring 
 the time taken for each approach and calculating the speedup achieved through parallelization.


 To compile this code with OpenMP support, use the following commands (filename: parallel_sum.cpp):

 >>> g++ -fopenmp -o parallel_sum parallel_sum.cpp
 >>> ./parallel_sum

 */


 #include <bits/stdc++.h>
 #include <omp.h>
 using namespace std;

 int main() {
    // Size of array: increase if you want more dramatic results
    const long long N = 100000000; // 100 million elements

    vector<int> arr(N);

    // Fill array with random numbers
    for (long long i = 0; i < N; i++) {
        arr[i] = rand() % 10;
    }

    // ---------------------------
    // 1. Sequential Sum
    // ---------------------------

    long long seq_sum = 0;
    double start_seq = omp_get_wtime();

    for (long long i = 0; i < N; i++) {
        seq_sum += arr[i];
    }

    double end_seq = omp_get_wtime();
    double seq_time = end_seq - start_seq;


    // ---------------------------
    // 2. Parallel Sum (OpenMP)
    // ---------------------------

    long long par_sum = 0;
    double start_par = omp_get_wtime();

    // Parallel for automatically distributes iterations
    #pragma omp parallel for reduction(+:par_sum)
    for (long long i = 0; i < N; i++) {
        par_sum += arr[i];
    }

    double end_par = omp_get_wtime();
    double par_time = end_par - start_par;


    // ---------------------------
    // Print Results
    // ---------------------------
    cout << "For array of size N = " << N << ":\n\n";
    cout << "Sequential Sum: " << seq_sum 
         << " | Time: " << seq_time << " sec\n";

    cout << "Parallel Sum:   " << par_sum 
         << " | Time: " << par_time << " sec\n";

    cout << "\nSpeedup = Sequential_Time / Parallel_Time = "
         << (seq_time / par_time) << "x\n";

    if (seq_sum == par_sum) {
        cout << "\nBoth sums match. Parallel execution is correct.\n";
    } else {
        cout << "\nSums do NOT match! Something is wrong.\n";
    }

    return 0;
 }


 /*
 Output Example:
 For array of size N = 100000000:

 Sequential Sum: 449950201 | Time: 0.423 sec
 Parallel Sum:   449950201 | Time: 0.219 sec

 Speedup = Sequential_Time / Parallel_Time = 1.93151x

 Both sums match. Parallel execution is correct.

 ## Output will vary based on system specifications and load.
 */
	/*

	How Parallel Computing Works to Calculate the Sum of an Array Using OpenMP

	OpenMP internally does 5 things to achieve parallelism in the sum calculation:

	1. Creates a team of threads (fork)
	If the machine has 8 cores, OpenMP might create 8 threads:
	T0, T1, T2, T3, T4, T5, T6, T7
	This is the "fork" part of the fork-join model.

	2. Divides the loop iterations among threads
	Each thread gets a portion of the array to work on.
	For example:
	- T0 processes arr[0] to arr[12499999]
	- T1 processes arr[12500000] to arr[24999999]
	- and so on...
	This division is called work-sharing.

	3. Each thread performs its own partial sum (private accumulation)
	Each thread gets a local copy of par_sum.
	Let us call them: par_sum_T0, par_sum_T1, par_sum_T2...

	Each thread sums its chunk:
	T0 → adds arr[0...12499999]
	T1 → adds arr[12500000...24999999]
	...

	4. At the end, OpenMP merges (reduces) all partial sums into the final sum
	OpenMP takes all the par_sum_Ti values and adds them together:
	par_sum = par_sum_T0 + par_sum_T1 + par_sum_T2 + ...
	This is the "join" step, and it happens in a safe, synchronized way.

	5. Destroys threads → returns to single-thread execution
	After merging the results, OpenMP shuts down the thread team.
	The rest of the code runs normally on a single main thread.


	Below is a complete C++ program that demonstrates this process using OpenMP to
	compute the sum of a large array both sequentially and in parallel, while measuring
	the time taken for each approach and calculating the speedup achieved through parallelization.


	To compile this code with OpenMP support, use the following commands (filename: parallel_sum.cpp):

	>>> g++ -fopenmp -o parallel_sum parallel_sum.cpp
	>>> ./parallel_sum

	*/


	#include <bits/stdc++.h>
	#include <omp.h>
	using namespace std;

	int main() {
	// Size of array: increase if you want more dramatic results
	const long long N = 100000000; // 100 million elements

	vector<int> arr(N);

	// Fill array with random numbers
	for (long long i = 0; i < N; i++) {
	arr[i] = rand() % 10;
	}

	// ---------------------------
	// 1. Sequential Sum
	// ---------------------------

	long long seq_sum = 0;
	double start_seq = omp_get_wtime();

	for (long long i = 0; i < N; i++) {
	seq_sum += arr[i];
	}

	double end_seq = omp_get_wtime();
	double seq_time = end_seq - start_seq;


	// ---------------------------
	// 2. Parallel Sum (OpenMP)
	// ---------------------------

	long long par_sum = 0;
	double start_par = omp_get_wtime();

	// Parallel for automatically distributes iterations
	#pragma omp parallel for reduction(+:par_sum)
	for (long long i = 0; i < N; i++) {
	par_sum += arr[i];
	}

	double end_par = omp_get_wtime();
	double par_time = end_par - start_par;


	// ---------------------------
	// Print Results
	// ---------------------------
	cout << "For array of size N = " << N << ":\n\n";
	cout << "Sequential Sum: " << seq_sum
	<< " \| Time: " << seq_time << " sec\n";

	cout << "Parallel Sum: " << par_sum
	<< " \| Time: " << par_time << " sec\n";

	cout << "\nSpeedup = Sequential_Time / Parallel_Time = "
	<< (seq_time / par_time) << "x\n";

	if (seq_sum == par_sum) {
	cout << "\nBoth sums match. Parallel execution is correct.\n";
	} else {
	cout << "\nSums do NOT match! Something is wrong.\n";
	}

	return 0;
	}


	/*
	Output Example:
	For array of size N = 100000000:

	Sequential Sum: 449950201 \| Time: 0.423 sec
	Parallel Sum: 449950201 \| Time: 0.219 sec

	Speedup = Sequential_Time / Parallel_Time = 1.93151x

	Both sums match. Parallel execution is correct.

	## Output will vary based on system specifications and load.
	*/
No results found