Created
February 12, 2026 13:56
-
-
Save robzolkos/b37977087df89c05a97a3a206d0fe66a to your computer and use it in GitHub Desktop.
Benchmark Rails parallel test workers to find the optimal core count for your machine
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # | |
| # bin/parallel_bench | |
| # | |
| # Benchmarks your Rails parallel test suite across 1..N workers (N = core count). | |
| # Runs each worker count 3 times in randomized order, reports median. | |
| # | |
| # Uses PARALLEL_WORKERS env var (Rails 6+ built-in) to control parallelism. | |
| # Fixed seed ensures identical test ordering across all runs. | |
| # Randomized execution order eliminates cache warmth bias. | |
| # | |
| # Prerequisites: | |
| # - Test databases already created and schema-loaded | |
| # - Tests passing at full parallelism | |
| # | |
| # Usage: | |
| # bin/parallel_bench # full run: 1..nproc, 3 samples each | |
| # bin/parallel_bench 4 16 # custom range: 4..16 | |
| # bin/parallel_bench 8 8 # single worker count, 3 samples | |
| # | |
| # macOS: requires `brew install coreutils` for gshuf, or replace: | |
| # nproc → sysctl -n hw.ncpu | |
| # free -h → sysctl -n hw.memsize | awk '{printf "%.0fGi", $1/1073741824}' | |
| # shuf → gshuf (from coreutils) | |
| # date +%s%N → ruby -e 'puts (Process.clock_gettime(Process::CLOCK_MONOTONIC) * 1e9).to_i' | |
| # | |
| set -euo pipefail | |
| MAX_CORES=$(nproc) | |
| MIN_WORKERS=${1:-1} | |
| MAX_WORKERS=${2:-$MAX_CORES} | |
| SAMPLES=3 | |
| SEED=$((RANDOM % 99999)) | |
| RESULTS_DIR="tmp/parallel_bench" | |
| RESULTS_FILE="$RESULTS_DIR/results_$(date +%Y%m%d_%H%M%S).csv" | |
| mkdir -p "$RESULTS_DIR" | |
| # ── System context ────────────────────────────────────────────────── | |
| echo "═══════════════════════════════════════════════════════════════" | |
| echo " Parallel Worker Benchmark" | |
| echo "═══════════════════════════════════════════════════════════════" | |
| echo "" | |
| echo "System:" | |
| echo " Cores: $MAX_CORES" | |
| echo " RAM: $(free -h | awk '/^Mem:/{print $2}')" | |
| echo " Ruby: $(ruby -v)" | |
| echo " Rails: $(RAILS_ENV=test bin/rails runner 'puts Rails.version' 2>/dev/null)" | |
| echo " MySQL: $(mysql --version 2>/dev/null || echo 'n/a')" | |
| echo " PG: $(psql --version 2>/dev/null || echo 'n/a')" | |
| echo "" | |
| echo "Config:" | |
| echo " Workers: $MIN_WORKERS..$MAX_WORKERS" | |
| echo " Samples: $SAMPLES per worker count" | |
| echo " Seed: $SEED" | |
| echo " Results: $RESULTS_FILE" | |
| echo "" | |
| # ── Build randomized run plan ─────────────────────────────────────── | |
| PLAN=() | |
| for workers in $(seq "$MIN_WORKERS" "$MAX_WORKERS"); do | |
| for sample in $(seq 1 $SAMPLES); do | |
| PLAN+=("$workers") | |
| done | |
| done | |
| # Shuffle the plan | |
| SHUFFLED=($(printf '%s\n' "${PLAN[@]}" | shuf)) | |
| TOTAL_RUNS=${#SHUFFLED[@]} | |
| echo "Total runs: $TOTAL_RUNS (estimated $(( TOTAL_RUNS / 3 )) - $(( TOTAL_RUNS / 2 )) minutes)" | |
| echo "" | |
| # ── Warmup ────────────────────────────────────────────────────────── | |
| echo "── Warmup (PARALLEL_WORKERS=$MAX_CORES, SEED=$SEED) ──" | |
| WARMUP_LOG="$RESULTS_DIR/warmup.log" | |
| if PARALLEL_WORKERS=$MAX_CORES RAILS_ENV=test bin/rails test --seed $SEED > "$WARMUP_LOG" 2>&1; then | |
| echo " Warmup complete. ✓" | |
| else | |
| echo " Warmup FAILED. ✗ — see $WARMUP_LOG" | |
| echo " Aborting: tests must pass before benchmarking." | |
| exit 1 | |
| fi | |
| echo "" | |
| # ── CSV header ────────────────────────────────────────────────────── | |
| echo "workers,sample,seconds" > "$RESULTS_FILE" | |
| # ── Run benchmarks ────────────────────────────────────────────────── | |
| declare -A SAMPLE_COUNT | |
| RUN=0 | |
| for workers in "${SHUFFLED[@]}"; do | |
| RUN=$((RUN + 1)) | |
| # Track which sample number this is for the worker count | |
| KEY="w${workers}" | |
| SAMPLE_COUNT[$KEY]=$(( ${SAMPLE_COUNT[$KEY]:-0} + 1 )) | |
| SAMPLE_NUM=${SAMPLE_COUNT[$KEY]} | |
| printf "[%3d/%d] workers=%-3d sample=%d ... " "$RUN" "$TOTAL_RUNS" "$workers" "$SAMPLE_NUM" | |
| RUN_LOG="$RESULTS_DIR/run_w${workers}_s${SAMPLE_NUM}.log" | |
| START=$(date +%s%N) | |
| if PARALLEL_WORKERS=$workers RAILS_ENV=test bin/rails test --seed $SEED > "$RUN_LOG" 2>&1; then | |
| END=$(date +%s%N) | |
| ELAPSED_NS=$((END - START)) | |
| ELAPSED_S=$(echo "scale=2; $ELAPSED_NS / 1000000000" | bc) | |
| echo "${ELAPSED_S}s ✓" | |
| echo "$workers,$SAMPLE_NUM,$ELAPSED_S" >> "$RESULTS_FILE" | |
| else | |
| END=$(date +%s%N) | |
| ELAPSED_NS=$((END - START)) | |
| ELAPSED_S=$(echo "scale=2; $ELAPSED_NS / 1000000000" | bc) | |
| echo "FAILED (${ELAPSED_S}s) ✗ — see $RUN_LOG" | |
| echo "$workers,$SAMPLE_NUM,FAILED" >> "$RESULTS_FILE" | |
| fi | |
| done | |
| # ── Summary ───────────────────────────────────────────────────────── | |
| echo "" | |
| echo "═══════════════════════════════════════════════════════════════" | |
| echo " Results (median of $SAMPLES runs, seed=$SEED)" | |
| echo "═══════════════════════════════════════════════════════════════" | |
| echo "" | |
| printf "%-10s %-10s %-10s %-10s %-10s\n" "Workers" "Median" "Min" "Max" "Spread" | |
| printf "%-10s %-10s %-10s %-10s %-10s\n" "-------" "------" "---" "---" "------" | |
| # Parse CSV and compute stats per worker count, sorted by median | |
| tail -n +2 "$RESULTS_FILE" | grep -v FAILED | sort -t, -k1 -n | awk -F, ' | |
| { | |
| w = $1 | |
| t = $3 | |
| data[w][++count[w]] = t | |
| } | |
| END { | |
| # Sort worker counts | |
| n = asorti(count, sorted, "@ind_num_asc") | |
| best_median = 999999 | |
| best_workers = 0 | |
| for (i = 1; i <= n; i++) { | |
| w = sorted[i] | |
| c = count[w] | |
| # Sort samples for this worker count (bubble sort, c is small) | |
| for (a = 1; a <= c; a++) | |
| for (b = a + 1; b <= c; b++) | |
| if (data[w][a] > data[w][b]) { | |
| tmp = data[w][a] | |
| data[w][a] = data[w][b] | |
| data[w][b] = tmp | |
| } | |
| median = data[w][int((c + 1) / 2)] | |
| min_v = data[w][1] | |
| max_v = data[w][c] | |
| spread = max_v - min_v | |
| if (median < best_median) { | |
| best_median = median | |
| best_workers = w | |
| } | |
| printf "%-10d %-10.2f %-10.2f %-10.2f %-10.2f\n", w, median, min_v, max_v, spread | |
| } | |
| print "" | |
| printf "★ Optimal: %d workers (%.2fs median)\n", best_workers, best_median | |
| } | |
| ' | |
| echo "" | |
| echo "Raw data: $RESULTS_FILE" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment