Skip to content

Instantly share code, notes, and snippets.

@iamtgiri
Created January 4, 2026 06:37
Show Gist options
  • Select an option

  • Save iamtgiri/5b420b5aaf475866d2fe28567e83ddf4 to your computer and use it in GitHub Desktop.

Select an option

Save iamtgiri/5b420b5aaf475866d2fe28567e83ddf4 to your computer and use it in GitHub Desktop.
Sequential vs Parallel Computing for Merge Sort Using OpenMP
/*
How Parallel Computing Works for Merge Sort Using OpenMP
Merge Sort follows the DIVIDE-AND-CONQUER paradigm:
1. Divide the array into two halves
2. Recursively sort the left half
3. Recursively sort the right half
4. Merge the two sorted halves
---------------------------------------------------
PARALLELIZATION OPPORTUNITY
---------------------------------------------------
- The two recursive calls (left and right halves)
are INDEPENDENT and can run in parallel.
- The merge step is sequential.
---------------------------------------------------
OPENMP STRATEGY (TASK PARALLELISM)
---------------------------------------------------
1. Use OpenMP TASKS to parallelize recursive calls
2. Limit task creation depth to avoid overhead
3. Use a cutoff threshold for small subarrays
4. Synchronize tasks before merging
This program compares:
- Sequential Merge Sort
- Parallel Merge Sort using OpenMP tasks
Compile:
>>> g++ -fopenmp -O2 -o parallel_merge_sort parallel_merge_sort.cpp
Run:
>>> ./parallel_merge_sort
*/
#include <bits/stdc++.h>
#include <omp.h>
using namespace std;
const int THRESHOLD = 10000; // Cutoff for parallel recursion
// ---------------------------
// Merge Function
// ---------------------------
void merge(vector<int>& arr, int l, int m, int r) {
int n1 = m - l + 1;
int n2 = r - m;
vector<int> L(n1), R(n2);
for (int i = 0; i < n1; i++)
L[i] = arr[l + i];
for (int j = 0; j < n2; j++)
R[j] = arr[m + 1 + j];
int i = 0, j = 0, k = l;
while (i < n1 && j < n2) {
if (L[i] <= R[j])
arr[k++] = L[i++];
else
arr[k++] = R[j++];
}
while (i < n1)
arr[k++] = L[i++];
while (j < n2)
arr[k++] = R[j++];
}
// ---------------------------
// Sequential Merge Sort
// ---------------------------
void seq_merge_sort(vector<int>& arr, int l, int r) {
if (l >= r) return;
int m = l + (r - l) / 2;
seq_merge_sort(arr, l, m);
seq_merge_sort(arr, m + 1, r);
merge(arr, l, m, r);
}
// ---------------------------
// Parallel Merge Sort (OpenMP)
// ---------------------------
void par_merge_sort(vector<int>& arr, int l, int r) {
if (l >= r) return;
// Switch to sequential for small ranges
if (r - l < THRESHOLD) {
seq_merge_sort(arr, l, r);
return;
}
int m = l + (r - l) / 2;
#pragma omp task shared(arr)
par_merge_sort(arr, l, m);
#pragma omp task shared(arr)
par_merge_sort(arr, m + 1, r);
#pragma omp taskwait
merge(arr, l, m, r);
}
int main() {
const int N = 10000000; // 10 million elements
vector<int> arr(N), seq_arr, par_arr;
srand(0);
for (int i = 0; i < N; i++) {
arr[i] = rand();
}
seq_arr = arr;
par_arr = arr;
// ---------------------------
// Sequential Merge Sort
// ---------------------------
double start_seq = omp_get_wtime();
seq_merge_sort(seq_arr, 0, N - 1);
double end_seq = omp_get_wtime();
double seq_time = end_seq - start_seq;
// ---------------------------
// Parallel Merge Sort
// ---------------------------
double start_par = omp_get_wtime();
#pragma omp parallel
{
#pragma omp single
par_merge_sort(par_arr, 0, N - 1);
}
double end_par = omp_get_wtime();
double par_time = end_par - start_par;
// ---------------------------
// Verification
// ---------------------------
bool correct = true;
for (int i = 0; i < N; i++) {
if (seq_arr[i] != par_arr[i]) {
correct = false;
break;
}
}
// ---------------------------
// Print Results
// ---------------------------
cout << "Merge Sort for N = " << N << "\n\n";
cout << "Sequential Time: " << seq_time << " sec\n";
cout << "Parallel Time: " << par_time << " sec\n";
cout << "\nSpeedup = " << (seq_time / par_time) << "x\n";
if (correct) {
cout << "\nArrays are sorted correctly.\n";
} else {
cout << "\nSorting error detected!\n";
}
return 0;
}
/*
Output Example:
Merge Sort for N = 10000000
Sequential Time: 2.444 sec
Parallel Time: 1.088 sec
Speedup = 2.24632x
Arrays are sorted correctly.
## Performance depends on core count and task granularity.
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment