iamtgiri · January 4, 2026 06:36
diff --git a/parallel_matmul.cpp b/parallel_matmul.cpp
 /*
 How Parallel Computing Works for Matrix Multiplication Using OpenMP

 Problem:
 Given two matrices:
 A (N × N) and B (N × N),
 compute matrix C = A × B.

 Each element:
 C[i][j] = Σ (A[i][k] × B[k][j]) for k = 0 to N-1

 ---------------------------------------------------
 PARALLELIZATION OPPORTUNITY
 ---------------------------------------------------
 - Each element C[i][j] is independent
 - No data dependency between different (i, j) pairs
 - This makes matrix multiplication EMBARRASSINGLY PARALLEL

 ---------------------------------------------------
 OPENMP STRATEGY
 ---------------------------------------------------
 1. Outer loops (i, j) are parallelized
 2. Inner loop (k) remains sequential
 3. Threads compute different rows or blocks of C
 4. No synchronization needed during computation

 This program compares:
 - Sequential matrix multiplication
 - Parallel matrix multiplication using OpenMP

 Compile:
 >>> g++ -fopenmp -O2 -o parallel_matmul parallel_matmul.cpp
 Run:
 >>> ./parallel_matmul
 */

 #include <bits/stdc++.h>
 #include <omp.h>
 using namespace std;

 int main() {
    const int N = 800;  // Matrix dimension (adjust for system)

    vector<vector<int>> A(N, vector<int>(N));
    vector<vector<int>> B(N, vector<int>(N));
    vector<vector<int>> C_seq(N, vector<int>(N, 0));
    vector<vector<int>> C_par(N, vector<int>(N, 0));

    // ---------------------------
    // Initialize Matrices
    // ---------------------------
    srand(0);
    for (int i = 0; i < N; i++) {
        for (int j = 0; j < N; j++) {
            A[i][j] = rand() % 10;
            B[i][j] = rand() % 10;
        }
    }

    // ---------------------------
    // 1. Sequential Multiplication
    // ---------------------------
    double start_seq = omp_get_wtime();

    for (int i = 0; i < N; i++) {
        for (int j = 0; j < N; j++) {
            int sum = 0;
            for (int k = 0; k < N; k++) {
                sum += A[i][k] * B[k][j];
            }
            C_seq[i][j] = sum;
        }
    }

    double end_seq = omp_get_wtime();
    double seq_time = end_seq - start_seq;

    // ---------------------------
    // 2. Parallel Multiplication
    // ---------------------------
    double start_par = omp_get_wtime();

    #pragma omp parallel for collapse(2)
    for (int i = 0; i < N; i++) {
        for (int j = 0; j < N; j++) {
            int sum = 0;
            for (int k = 0; k < N; k++) {
                sum += A[i][k] * B[k][j];
            }
            C_par[i][j] = sum;
        }
    }

    double end_par = omp_get_wtime();
    double par_time = end_par - start_par;

    // ---------------------------
    // Verification
    // ---------------------------
    bool correct = true;
    for (int i = 0; i < N && correct; i++) {
        for (int j = 0; j < N; j++) {
            if (C_seq[i][j] != C_par[i][j]) {
                correct = false;
                break;
            }
        }
    }

    // ---------------------------
    // Print Results
    // ---------------------------
    cout << "Matrix Multiplication (N = " << N << ")\n\n";

    cout << "Sequential Time: " << seq_time << " sec\n";
    cout << "Parallel Time:   " << par_time << " sec\n";

    cout << "\nSpeedup = " << (seq_time / par_time) << "x\n";

    if (correct) {
        cout << "\nMatrix multiplication is correct.\n";
    } else {
        cout << "\nMismatch detected in results!\n";
    }

    return 0;
 }

 /*
 Output Example:
 Matrix Multiplication (N = 800)

 Sequential Time: 0.837 sec
 Parallel Time:   0.343 sec

 Speedup = 2.44023x

 Matrix multiplication is correct.
 ## Performance depends on cores, cache, and memory bandwidth.
 */
	/*
	How Parallel Computing Works for Matrix Multiplication Using OpenMP

	Problem:
	Given two matrices:
	A (N × N) and B (N × N),
	compute matrix C = A × B.

	Each element:
	C[i][j] = Σ (A[i][k] × B[k][j]) for k = 0 to N-1

	---------------------------------------------------
	PARALLELIZATION OPPORTUNITY
	---------------------------------------------------
	- Each element C[i][j] is independent
	- No data dependency between different (i, j) pairs
	- This makes matrix multiplication EMBARRASSINGLY PARALLEL

	---------------------------------------------------
	OPENMP STRATEGY
	---------------------------------------------------
	1. Outer loops (i, j) are parallelized
	2. Inner loop (k) remains sequential
	3. Threads compute different rows or blocks of C
	4. No synchronization needed during computation

	This program compares:
	- Sequential matrix multiplication
	- Parallel matrix multiplication using OpenMP

	Compile:
	>>> g++ -fopenmp -O2 -o parallel_matmul parallel_matmul.cpp
	Run:
	>>> ./parallel_matmul
	*/

	#include <bits/stdc++.h>
	#include <omp.h>
	using namespace std;

	int main() {
	const int N = 800; // Matrix dimension (adjust for system)

	vector<vector<int>> A(N, vector<int>(N));
	vector<vector<int>> B(N, vector<int>(N));
	vector<vector<int>> C_seq(N, vector<int>(N, 0));
	vector<vector<int>> C_par(N, vector<int>(N, 0));

	// ---------------------------
	// Initialize Matrices
	// ---------------------------
	srand(0);
	for (int i = 0; i < N; i++) {
	for (int j = 0; j < N; j++) {
	A[i][j] = rand() % 10;
	B[i][j] = rand() % 10;
	}
	}

	// ---------------------------
	// 1. Sequential Multiplication
	// ---------------------------
	double start_seq = omp_get_wtime();

	for (int i = 0; i < N; i++) {
	for (int j = 0; j < N; j++) {
	int sum = 0;
	for (int k = 0; k < N; k++) {
	sum += A[i][k] * B[k][j];
	}
	C_seq[i][j] = sum;
	}
	}

	double end_seq = omp_get_wtime();
	double seq_time = end_seq - start_seq;

	// ---------------------------
	// 2. Parallel Multiplication
	// ---------------------------
	double start_par = omp_get_wtime();

	#pragma omp parallel for collapse(2)
	for (int i = 0; i < N; i++) {
	for (int j = 0; j < N; j++) {
	int sum = 0;
	for (int k = 0; k < N; k++) {
	sum += A[i][k] * B[k][j];
	}
	C_par[i][j] = sum;
	}
	}

	double end_par = omp_get_wtime();
	double par_time = end_par - start_par;

	// ---------------------------
	// Verification
	// ---------------------------
	bool correct = true;
	for (int i = 0; i < N && correct; i++) {
	for (int j = 0; j < N; j++) {
	if (C_seq[i][j] != C_par[i][j]) {
	correct = false;
	break;
	}
	}
	}

	// ---------------------------
	// Print Results
	// ---------------------------
	cout << "Matrix Multiplication (N = " << N << ")\n\n";

	cout << "Sequential Time: " << seq_time << " sec\n";
	cout << "Parallel Time: " << par_time << " sec\n";

	cout << "\nSpeedup = " << (seq_time / par_time) << "x\n";

	if (correct) {
	cout << "\nMatrix multiplication is correct.\n";
	} else {
	cout << "\nMismatch detected in results!\n";
	}

	return 0;
	}

	/*
	Output Example:
	Matrix Multiplication (N = 800)

	Sequential Time: 0.837 sec
	Parallel Time: 0.343 sec

	Speedup = 2.44023x

	Matrix multiplication is correct.
	## Performance depends on cores, cache, and memory bandwidth.
	*/
No results found