Created
January 4, 2026 06:36
-
-
Save iamtgiri/4bde7187c951081c7d429f461f4df825 to your computer and use it in GitHub Desktop.
Sequential vs Parallel Computing for Matrix Multiplication Using OpenMP
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| How Parallel Computing Works for Matrix Multiplication Using OpenMP | |
| Problem: | |
| Given two matrices: | |
| A (N × N) and B (N × N), | |
| compute matrix C = A × B. | |
| Each element: | |
| C[i][j] = Σ (A[i][k] × B[k][j]) for k = 0 to N-1 | |
| --------------------------------------------------- | |
| PARALLELIZATION OPPORTUNITY | |
| --------------------------------------------------- | |
| - Each element C[i][j] is independent | |
| - No data dependency between different (i, j) pairs | |
| - This makes matrix multiplication EMBARRASSINGLY PARALLEL | |
| --------------------------------------------------- | |
| OPENMP STRATEGY | |
| --------------------------------------------------- | |
| 1. Outer loops (i, j) are parallelized | |
| 2. Inner loop (k) remains sequential | |
| 3. Threads compute different rows or blocks of C | |
| 4. No synchronization needed during computation | |
| This program compares: | |
| - Sequential matrix multiplication | |
| - Parallel matrix multiplication using OpenMP | |
| Compile: | |
| >>> g++ -fopenmp -O2 -o parallel_matmul parallel_matmul.cpp | |
| Run: | |
| >>> ./parallel_matmul | |
| */ | |
| #include <bits/stdc++.h> | |
| #include <omp.h> | |
| using namespace std; | |
| int main() { | |
| const int N = 800; // Matrix dimension (adjust for system) | |
| vector<vector<int>> A(N, vector<int>(N)); | |
| vector<vector<int>> B(N, vector<int>(N)); | |
| vector<vector<int>> C_seq(N, vector<int>(N, 0)); | |
| vector<vector<int>> C_par(N, vector<int>(N, 0)); | |
| // --------------------------- | |
| // Initialize Matrices | |
| // --------------------------- | |
| srand(0); | |
| for (int i = 0; i < N; i++) { | |
| for (int j = 0; j < N; j++) { | |
| A[i][j] = rand() % 10; | |
| B[i][j] = rand() % 10; | |
| } | |
| } | |
| // --------------------------- | |
| // 1. Sequential Multiplication | |
| // --------------------------- | |
| double start_seq = omp_get_wtime(); | |
| for (int i = 0; i < N; i++) { | |
| for (int j = 0; j < N; j++) { | |
| int sum = 0; | |
| for (int k = 0; k < N; k++) { | |
| sum += A[i][k] * B[k][j]; | |
| } | |
| C_seq[i][j] = sum; | |
| } | |
| } | |
| double end_seq = omp_get_wtime(); | |
| double seq_time = end_seq - start_seq; | |
| // --------------------------- | |
| // 2. Parallel Multiplication | |
| // --------------------------- | |
| double start_par = omp_get_wtime(); | |
| #pragma omp parallel for collapse(2) | |
| for (int i = 0; i < N; i++) { | |
| for (int j = 0; j < N; j++) { | |
| int sum = 0; | |
| for (int k = 0; k < N; k++) { | |
| sum += A[i][k] * B[k][j]; | |
| } | |
| C_par[i][j] = sum; | |
| } | |
| } | |
| double end_par = omp_get_wtime(); | |
| double par_time = end_par - start_par; | |
| // --------------------------- | |
| // Verification | |
| // --------------------------- | |
| bool correct = true; | |
| for (int i = 0; i < N && correct; i++) { | |
| for (int j = 0; j < N; j++) { | |
| if (C_seq[i][j] != C_par[i][j]) { | |
| correct = false; | |
| break; | |
| } | |
| } | |
| } | |
| // --------------------------- | |
| // Print Results | |
| // --------------------------- | |
| cout << "Matrix Multiplication (N = " << N << ")\n\n"; | |
| cout << "Sequential Time: " << seq_time << " sec\n"; | |
| cout << "Parallel Time: " << par_time << " sec\n"; | |
| cout << "\nSpeedup = " << (seq_time / par_time) << "x\n"; | |
| if (correct) { | |
| cout << "\nMatrix multiplication is correct.\n"; | |
| } else { | |
| cout << "\nMismatch detected in results!\n"; | |
| } | |
| return 0; | |
| } | |
| /* | |
| Output Example: | |
| Matrix Multiplication (N = 800) | |
| Sequential Time: 0.837 sec | |
| Parallel Time: 0.343 sec | |
| Speedup = 2.44023x | |
| Matrix multiplication is correct. | |
| ## Performance depends on cores, cache, and memory bandwidth. | |
| */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment