Skip to content

Instantly share code, notes, and snippets.

@robzolkos
Created February 12, 2026 13:56
Show Gist options
  • Select an option

  • Save robzolkos/b37977087df89c05a97a3a206d0fe66a to your computer and use it in GitHub Desktop.

Select an option

Save robzolkos/b37977087df89c05a97a3a206d0fe66a to your computer and use it in GitHub Desktop.
Benchmark Rails parallel test workers to find the optimal core count for your machine
#!/usr/bin/env bash
#
# bin/parallel_bench
#
# Benchmarks your Rails parallel test suite across 1..N workers (N = core count).
# Runs each worker count 3 times in randomized order, reports median.
#
# Uses PARALLEL_WORKERS env var (Rails 6+ built-in) to control parallelism.
# Fixed seed ensures identical test ordering across all runs.
# Randomized execution order eliminates cache warmth bias.
#
# Prerequisites:
# - Test databases already created and schema-loaded
# - Tests passing at full parallelism
#
# Usage:
# bin/parallel_bench # full run: 1..nproc, 3 samples each
# bin/parallel_bench 4 16 # custom range: 4..16
# bin/parallel_bench 8 8 # single worker count, 3 samples
#
# macOS: requires `brew install coreutils` for gshuf, or replace:
# nproc → sysctl -n hw.ncpu
# free -h → sysctl -n hw.memsize | awk '{printf "%.0fGi", $1/1073741824}'
# shuf → gshuf (from coreutils)
# date +%s%N → ruby -e 'puts (Process.clock_gettime(Process::CLOCK_MONOTONIC) * 1e9).to_i'
#
set -euo pipefail
MAX_CORES=$(nproc)
MIN_WORKERS=${1:-1}
MAX_WORKERS=${2:-$MAX_CORES}
SAMPLES=3
SEED=$((RANDOM % 99999))
RESULTS_DIR="tmp/parallel_bench"
RESULTS_FILE="$RESULTS_DIR/results_$(date +%Y%m%d_%H%M%S).csv"
mkdir -p "$RESULTS_DIR"
# ── System context ──────────────────────────────────────────────────
echo "═══════════════════════════════════════════════════════════════"
echo " Parallel Worker Benchmark"
echo "═══════════════════════════════════════════════════════════════"
echo ""
echo "System:"
echo " Cores: $MAX_CORES"
echo " RAM: $(free -h | awk '/^Mem:/{print $2}')"
echo " Ruby: $(ruby -v)"
echo " Rails: $(RAILS_ENV=test bin/rails runner 'puts Rails.version' 2>/dev/null)"
echo " MySQL: $(mysql --version 2>/dev/null || echo 'n/a')"
echo " PG: $(psql --version 2>/dev/null || echo 'n/a')"
echo ""
echo "Config:"
echo " Workers: $MIN_WORKERS..$MAX_WORKERS"
echo " Samples: $SAMPLES per worker count"
echo " Seed: $SEED"
echo " Results: $RESULTS_FILE"
echo ""
# ── Build randomized run plan ───────────────────────────────────────
PLAN=()
for workers in $(seq "$MIN_WORKERS" "$MAX_WORKERS"); do
for sample in $(seq 1 $SAMPLES); do
PLAN+=("$workers")
done
done
# Shuffle the plan
SHUFFLED=($(printf '%s\n' "${PLAN[@]}" | shuf))
TOTAL_RUNS=${#SHUFFLED[@]}
echo "Total runs: $TOTAL_RUNS (estimated $(( TOTAL_RUNS / 3 )) - $(( TOTAL_RUNS / 2 )) minutes)"
echo ""
# ── Warmup ──────────────────────────────────────────────────────────
echo "── Warmup (PARALLEL_WORKERS=$MAX_CORES, SEED=$SEED) ──"
WARMUP_LOG="$RESULTS_DIR/warmup.log"
if PARALLEL_WORKERS=$MAX_CORES RAILS_ENV=test bin/rails test --seed $SEED > "$WARMUP_LOG" 2>&1; then
echo " Warmup complete. ✓"
else
echo " Warmup FAILED. ✗ — see $WARMUP_LOG"
echo " Aborting: tests must pass before benchmarking."
exit 1
fi
echo ""
# ── CSV header ──────────────────────────────────────────────────────
echo "workers,sample,seconds" > "$RESULTS_FILE"
# ── Run benchmarks ──────────────────────────────────────────────────
declare -A SAMPLE_COUNT
RUN=0
for workers in "${SHUFFLED[@]}"; do
RUN=$((RUN + 1))
# Track which sample number this is for the worker count
KEY="w${workers}"
SAMPLE_COUNT[$KEY]=$(( ${SAMPLE_COUNT[$KEY]:-0} + 1 ))
SAMPLE_NUM=${SAMPLE_COUNT[$KEY]}
printf "[%3d/%d] workers=%-3d sample=%d ... " "$RUN" "$TOTAL_RUNS" "$workers" "$SAMPLE_NUM"
RUN_LOG="$RESULTS_DIR/run_w${workers}_s${SAMPLE_NUM}.log"
START=$(date +%s%N)
if PARALLEL_WORKERS=$workers RAILS_ENV=test bin/rails test --seed $SEED > "$RUN_LOG" 2>&1; then
END=$(date +%s%N)
ELAPSED_NS=$((END - START))
ELAPSED_S=$(echo "scale=2; $ELAPSED_NS / 1000000000" | bc)
echo "${ELAPSED_S}s ✓"
echo "$workers,$SAMPLE_NUM,$ELAPSED_S" >> "$RESULTS_FILE"
else
END=$(date +%s%N)
ELAPSED_NS=$((END - START))
ELAPSED_S=$(echo "scale=2; $ELAPSED_NS / 1000000000" | bc)
echo "FAILED (${ELAPSED_S}s) ✗ — see $RUN_LOG"
echo "$workers,$SAMPLE_NUM,FAILED" >> "$RESULTS_FILE"
fi
done
# ── Summary ─────────────────────────────────────────────────────────
echo ""
echo "═══════════════════════════════════════════════════════════════"
echo " Results (median of $SAMPLES runs, seed=$SEED)"
echo "═══════════════════════════════════════════════════════════════"
echo ""
printf "%-10s %-10s %-10s %-10s %-10s\n" "Workers" "Median" "Min" "Max" "Spread"
printf "%-10s %-10s %-10s %-10s %-10s\n" "-------" "------" "---" "---" "------"
# Parse CSV and compute stats per worker count, sorted by median
tail -n +2 "$RESULTS_FILE" | grep -v FAILED | sort -t, -k1 -n | awk -F, '
{
w = $1
t = $3
data[w][++count[w]] = t
}
END {
# Sort worker counts
n = asorti(count, sorted, "@ind_num_asc")
best_median = 999999
best_workers = 0
for (i = 1; i <= n; i++) {
w = sorted[i]
c = count[w]
# Sort samples for this worker count (bubble sort, c is small)
for (a = 1; a <= c; a++)
for (b = a + 1; b <= c; b++)
if (data[w][a] > data[w][b]) {
tmp = data[w][a]
data[w][a] = data[w][b]
data[w][b] = tmp
}
median = data[w][int((c + 1) / 2)]
min_v = data[w][1]
max_v = data[w][c]
spread = max_v - min_v
if (median < best_median) {
best_median = median
best_workers = w
}
printf "%-10d %-10.2f %-10.2f %-10.2f %-10.2f\n", w, median, min_v, max_v, spread
}
print ""
printf "★ Optimal: %d workers (%.2fs median)\n", best_workers, best_median
}
'
echo ""
echo "Raw data: $RESULTS_FILE"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment