Skip to content

Instantly share code, notes, and snippets.

@iamtgiri
Created January 4, 2026 06:36
Show Gist options
  • Select an option

  • Save iamtgiri/4bde7187c951081c7d429f461f4df825 to your computer and use it in GitHub Desktop.

Select an option

Save iamtgiri/4bde7187c951081c7d429f461f4df825 to your computer and use it in GitHub Desktop.
Sequential vs Parallel Computing for Matrix Multiplication Using OpenMP
/*
How Parallel Computing Works for Matrix Multiplication Using OpenMP
Problem:
Given two matrices:
A (N × N) and B (N × N),
compute matrix C = A × B.
Each element:
C[i][j] = Σ (A[i][k] × B[k][j]) for k = 0 to N-1
---------------------------------------------------
PARALLELIZATION OPPORTUNITY
---------------------------------------------------
- Each element C[i][j] is independent
- No data dependency between different (i, j) pairs
- This makes matrix multiplication EMBARRASSINGLY PARALLEL
---------------------------------------------------
OPENMP STRATEGY
---------------------------------------------------
1. Outer loops (i, j) are parallelized
2. Inner loop (k) remains sequential
3. Threads compute different rows or blocks of C
4. No synchronization needed during computation
This program compares:
- Sequential matrix multiplication
- Parallel matrix multiplication using OpenMP
Compile:
>>> g++ -fopenmp -O2 -o parallel_matmul parallel_matmul.cpp
Run:
>>> ./parallel_matmul
*/
#include <bits/stdc++.h>
#include <omp.h>
using namespace std;
int main() {
const int N = 800; // Matrix dimension (adjust for system)
vector<vector<int>> A(N, vector<int>(N));
vector<vector<int>> B(N, vector<int>(N));
vector<vector<int>> C_seq(N, vector<int>(N, 0));
vector<vector<int>> C_par(N, vector<int>(N, 0));
// ---------------------------
// Initialize Matrices
// ---------------------------
srand(0);
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
A[i][j] = rand() % 10;
B[i][j] = rand() % 10;
}
}
// ---------------------------
// 1. Sequential Multiplication
// ---------------------------
double start_seq = omp_get_wtime();
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
int sum = 0;
for (int k = 0; k < N; k++) {
sum += A[i][k] * B[k][j];
}
C_seq[i][j] = sum;
}
}
double end_seq = omp_get_wtime();
double seq_time = end_seq - start_seq;
// ---------------------------
// 2. Parallel Multiplication
// ---------------------------
double start_par = omp_get_wtime();
#pragma omp parallel for collapse(2)
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
int sum = 0;
for (int k = 0; k < N; k++) {
sum += A[i][k] * B[k][j];
}
C_par[i][j] = sum;
}
}
double end_par = omp_get_wtime();
double par_time = end_par - start_par;
// ---------------------------
// Verification
// ---------------------------
bool correct = true;
for (int i = 0; i < N && correct; i++) {
for (int j = 0; j < N; j++) {
if (C_seq[i][j] != C_par[i][j]) {
correct = false;
break;
}
}
}
// ---------------------------
// Print Results
// ---------------------------
cout << "Matrix Multiplication (N = " << N << ")\n\n";
cout << "Sequential Time: " << seq_time << " sec\n";
cout << "Parallel Time: " << par_time << " sec\n";
cout << "\nSpeedup = " << (seq_time / par_time) << "x\n";
if (correct) {
cout << "\nMatrix multiplication is correct.\n";
} else {
cout << "\nMismatch detected in results!\n";
}
return 0;
}
/*
Output Example:
Matrix Multiplication (N = 800)
Sequential Time: 0.837 sec
Parallel Time: 0.343 sec
Speedup = 2.44023x
Matrix multiplication is correct.
## Performance depends on cores, cache, and memory bandwidth.
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment