robzolkos · February 12, 2026 13:56
diff --git a/parallel_bench b/parallel_bench
 #!/usr/bin/env bash
 #
 # bin/parallel_bench
 #
 # Benchmarks your Rails parallel test suite across 1..N workers (N = core count).
 # Runs each worker count 3 times in randomized order, reports median.
 #
 # Uses PARALLEL_WORKERS env var (Rails 6+ built-in) to control parallelism.
 # Fixed seed ensures identical test ordering across all runs.
 # Randomized execution order eliminates cache warmth bias.
 #
 # Prerequisites:
 #   - Test databases already created and schema-loaded
 #   - Tests passing at full parallelism
 #
 # Usage:
 #   bin/parallel_bench          # full run: 1..nproc, 3 samples each
 #   bin/parallel_bench 4 16     # custom range: 4..16
 #   bin/parallel_bench 8 8      # single worker count, 3 samples
 #
 # macOS: requires `brew install coreutils` for gshuf, or replace:
 #   nproc          → sysctl -n hw.ncpu
 #   free -h        → sysctl -n hw.memsize | awk '{printf "%.0fGi", $1/1073741824}'
 #   shuf           → gshuf (from coreutils)
 #   date +%s%N     → ruby -e 'puts (Process.clock_gettime(Process::CLOCK_MONOTONIC) * 1e9).to_i'
 #
 set -euo pipefail

 MAX_CORES=$(nproc)
 MIN_WORKERS=${1:-1}
 MAX_WORKERS=${2:-$MAX_CORES}
 SAMPLES=3
 SEED=$((RANDOM % 99999))
 RESULTS_DIR="tmp/parallel_bench"
 RESULTS_FILE="$RESULTS_DIR/results_$(date +%Y%m%d_%H%M%S).csv"

 mkdir -p "$RESULTS_DIR"

 # ── System context ──────────────────────────────────────────────────
 echo "═══════════════════════════════════════════════════════════════"
 echo " Parallel Worker Benchmark"
 echo "═══════════════════════════════════════════════════════════════"
 echo ""
 echo "System:"
 echo "  Cores:    $MAX_CORES"
 echo "  RAM:      $(free -h | awk '/^Mem:/{print $2}')"
 echo "  Ruby:     $(ruby -v)"
 echo "  Rails:    $(RAILS_ENV=test bin/rails runner 'puts Rails.version' 2>/dev/null)"
 echo "  MySQL:    $(mysql --version 2>/dev/null || echo 'n/a')"
 echo "  PG:       $(psql --version 2>/dev/null || echo 'n/a')"
 echo ""
 echo "Config:"
 echo "  Workers:  $MIN_WORKERS..$MAX_WORKERS"
 echo "  Samples:  $SAMPLES per worker count"
 echo "  Seed:     $SEED"
 echo "  Results:  $RESULTS_FILE"
 echo ""

 # ── Build randomized run plan ───────────────────────────────────────
 PLAN=()
 for workers in $(seq "$MIN_WORKERS" "$MAX_WORKERS"); do
  for sample in $(seq 1 $SAMPLES); do
    PLAN+=("$workers")
  done
 done

 # Shuffle the plan
 SHUFFLED=($(printf '%s\n' "${PLAN[@]}" | shuf))

 TOTAL_RUNS=${#SHUFFLED[@]}
 echo "Total runs: $TOTAL_RUNS (estimated $(( TOTAL_RUNS / 3 )) - $(( TOTAL_RUNS / 2 )) minutes)"
 echo ""

 # ── Warmup ──────────────────────────────────────────────────────────
 echo "── Warmup (PARALLEL_WORKERS=$MAX_CORES, SEED=$SEED) ──"
 WARMUP_LOG="$RESULTS_DIR/warmup.log"
 if PARALLEL_WORKERS=$MAX_CORES RAILS_ENV=test bin/rails test --seed $SEED > "$WARMUP_LOG" 2>&1; then
  echo "   Warmup complete. ✓"
 else
  echo "   Warmup FAILED. ✗ — see $WARMUP_LOG"
  echo "   Aborting: tests must pass before benchmarking."
  exit 1
 fi
 echo ""

 # ── CSV header ──────────────────────────────────────────────────────
 echo "workers,sample,seconds" > "$RESULTS_FILE"

 # ── Run benchmarks ──────────────────────────────────────────────────
 declare -A SAMPLE_COUNT
 RUN=0

 for workers in "${SHUFFLED[@]}"; do
  RUN=$((RUN + 1))

  # Track which sample number this is for the worker count
  KEY="w${workers}"
  SAMPLE_COUNT[$KEY]=$(( ${SAMPLE_COUNT[$KEY]:-0} + 1 ))
  SAMPLE_NUM=${SAMPLE_COUNT[$KEY]}

  printf "[%3d/%d] workers=%-3d sample=%d ... " "$RUN" "$TOTAL_RUNS" "$workers" "$SAMPLE_NUM"

  RUN_LOG="$RESULTS_DIR/run_w${workers}_s${SAMPLE_NUM}.log"

  START=$(date +%s%N)
  if PARALLEL_WORKERS=$workers RAILS_ENV=test bin/rails test --seed $SEED > "$RUN_LOG" 2>&1; then
    END=$(date +%s%N)
    ELAPSED_NS=$((END - START))
    ELAPSED_S=$(echo "scale=2; $ELAPSED_NS / 1000000000" | bc)
    echo "${ELAPSED_S}s ✓"
    echo "$workers,$SAMPLE_NUM,$ELAPSED_S" >> "$RESULTS_FILE"
  else
    END=$(date +%s%N)
    ELAPSED_NS=$((END - START))
    ELAPSED_S=$(echo "scale=2; $ELAPSED_NS / 1000000000" | bc)
    echo "FAILED (${ELAPSED_S}s) ✗ — see $RUN_LOG"
    echo "$workers,$SAMPLE_NUM,FAILED" >> "$RESULTS_FILE"
  fi
 done

 # ── Summary ─────────────────────────────────────────────────────────
 echo ""
 echo "═══════════════════════════════════════════════════════════════"
 echo " Results (median of $SAMPLES runs, seed=$SEED)"
 echo "═══════════════════════════════════════════════════════════════"
 echo ""
 printf "%-10s %-10s %-10s %-10s %-10s\n" "Workers" "Median" "Min" "Max" "Spread"
 printf "%-10s %-10s %-10s %-10s %-10s\n" "-------" "------" "---" "---" "------"

 # Parse CSV and compute stats per worker count, sorted by median
 tail -n +2 "$RESULTS_FILE" | grep -v FAILED | sort -t, -k1 -n | awk -F, '
 {
  w = $1
  t = $3
  data[w][++count[w]] = t
 }
 END {
  # Sort worker counts
  n = asorti(count, sorted, "@ind_num_asc")
  best_median = 999999
  best_workers = 0

  for (i = 1; i <= n; i++) {
    w = sorted[i]
    c = count[w]

    # Sort samples for this worker count (bubble sort, c is small)
    for (a = 1; a <= c; a++)
      for (b = a + 1; b <= c; b++)
        if (data[w][a] > data[w][b]) {
          tmp = data[w][a]
          data[w][a] = data[w][b]
          data[w][b] = tmp
        }

    median = data[w][int((c + 1) / 2)]
    min_v = data[w][1]
    max_v = data[w][c]
    spread = max_v - min_v

    if (median < best_median) {
      best_median = median
      best_workers = w
    }

    printf "%-10d %-10.2f %-10.2f %-10.2f %-10.2f\n", w, median, min_v, max_v, spread
  }

  print ""
  printf "★ Optimal: %d workers (%.2fs median)\n", best_workers, best_median
 }
 '

 echo ""
 echo "Raw data: $RESULTS_FILE"
	#!/usr/bin/env bash
	#
	# bin/parallel_bench
	#
	# Benchmarks your Rails parallel test suite across 1..N workers (N = core count).
	# Runs each worker count 3 times in randomized order, reports median.
	#
	# Uses PARALLEL_WORKERS env var (Rails 6+ built-in) to control parallelism.
	# Fixed seed ensures identical test ordering across all runs.
	# Randomized execution order eliminates cache warmth bias.
	#
	# Prerequisites:
	# - Test databases already created and schema-loaded
	# - Tests passing at full parallelism
	#
	# Usage:
	# bin/parallel_bench # full run: 1..nproc, 3 samples each
	# bin/parallel_bench 4 16 # custom range: 4..16
	# bin/parallel_bench 8 8 # single worker count, 3 samples
	#
	# macOS: requires `brew install coreutils` for gshuf, or replace:
	# nproc → sysctl -n hw.ncpu
	# free -h → sysctl -n hw.memsize \| awk '{printf "%.0fGi", $1/1073741824}'
	# shuf → gshuf (from coreutils)
	# date +%s%N → ruby -e 'puts (Process.clock_gettime(Process::CLOCK_MONOTONIC) * 1e9).to_i'
	#
	set -euo pipefail

	MAX_CORES=$(nproc)
	MIN_WORKERS=${1:-1}
	MAX_WORKERS=${2:-$MAX_CORES}
	SAMPLES=3
	SEED=$((RANDOM % 99999))
	RESULTS_DIR="tmp/parallel_bench"
	RESULTS_FILE="$RESULTS_DIR/results_$(date +%Y%m%d_%H%M%S).csv"

	mkdir -p "$RESULTS_DIR"

	# ── System context ──────────────────────────────────────────────────
	echo "═══════════════════════════════════════════════════════════════"
	echo " Parallel Worker Benchmark"
	echo "═══════════════════════════════════════════════════════════════"
	echo ""
	echo "System:"
	echo " Cores: $MAX_CORES"
	echo " RAM: $(free -h \| awk '/^Mem:/{print $2}')"
	echo " Ruby: $(ruby -v)"
	echo " Rails: $(RAILS_ENV=test bin/rails runner 'puts Rails.version' 2>/dev/null)"
	echo " MySQL: $(mysql --version 2>/dev/null \|\| echo 'n/a')"
	echo " PG: $(psql --version 2>/dev/null \|\| echo 'n/a')"
	echo ""
	echo "Config:"
	echo " Workers: $MIN_WORKERS..$MAX_WORKERS"
	echo " Samples: $SAMPLES per worker count"
	echo " Seed: $SEED"
	echo " Results: $RESULTS_FILE"
	echo ""

	# ── Build randomized run plan ───────────────────────────────────────
	PLAN=()
	for workers in $(seq "$MIN_WORKERS" "$MAX_WORKERS"); do
	for sample in $(seq 1 $SAMPLES); do
	PLAN+=("$workers")
	done
	done

	# Shuffle the plan
	SHUFFLED=($(printf '%s\n' "${PLAN[@]}" \| shuf))

	TOTAL_RUNS=${#SHUFFLED[@]}
	echo "Total runs: $TOTAL_RUNS (estimated $(( TOTAL_RUNS / 3 )) - $(( TOTAL_RUNS / 2 )) minutes)"
	echo ""

	# ── Warmup ──────────────────────────────────────────────────────────
	echo "── Warmup (PARALLEL_WORKERS=$MAX_CORES, SEED=$SEED) ──"
	WARMUP_LOG="$RESULTS_DIR/warmup.log"
	if PARALLEL_WORKERS=$MAX_CORES RAILS_ENV=test bin/rails test --seed $SEED > "$WARMUP_LOG" 2>&1; then
	echo " Warmup complete. ✓"
	else
	echo " Warmup FAILED. ✗ — see $WARMUP_LOG"
	echo " Aborting: tests must pass before benchmarking."
	exit 1
	fi
	echo ""

	# ── CSV header ──────────────────────────────────────────────────────
	echo "workers,sample,seconds" > "$RESULTS_FILE"

	# ── Run benchmarks ──────────────────────────────────────────────────
	declare -A SAMPLE_COUNT
	RUN=0

	for workers in "${SHUFFLED[@]}"; do
	RUN=$((RUN + 1))

	# Track which sample number this is for the worker count
	KEY="w${workers}"
	SAMPLE_COUNT[$KEY]=$(( ${SAMPLE_COUNT[$KEY]:-0} + 1 ))
	SAMPLE_NUM=${SAMPLE_COUNT[$KEY]}

	printf "[%3d/%d] workers=%-3d sample=%d ... " "$RUN" "$TOTAL_RUNS" "$workers" "$SAMPLE_NUM"

	RUN_LOG="$RESULTS_DIR/run_w${workers}_s${SAMPLE_NUM}.log"

	START=$(date +%s%N)
	if PARALLEL_WORKERS=$workers RAILS_ENV=test bin/rails test --seed $SEED > "$RUN_LOG" 2>&1; then
	END=$(date +%s%N)
	ELAPSED_NS=$((END - START))
	ELAPSED_S=$(echo "scale=2; $ELAPSED_NS / 1000000000" \| bc)
	echo "${ELAPSED_S}s ✓"
	echo "$workers,$SAMPLE_NUM,$ELAPSED_S" >> "$RESULTS_FILE"
	else
	END=$(date +%s%N)
	ELAPSED_NS=$((END - START))
	ELAPSED_S=$(echo "scale=2; $ELAPSED_NS / 1000000000" \| bc)
	echo "FAILED (${ELAPSED_S}s) ✗ — see $RUN_LOG"
	echo "$workers,$SAMPLE_NUM,FAILED" >> "$RESULTS_FILE"
	fi
	done

	# ── Summary ─────────────────────────────────────────────────────────
	echo ""
	echo "═══════════════════════════════════════════════════════════════"
	echo " Results (median of $SAMPLES runs, seed=$SEED)"
	echo "═══════════════════════════════════════════════════════════════"
	echo ""
	printf "%-10s %-10s %-10s %-10s %-10s\n" "Workers" "Median" "Min" "Max" "Spread"
	printf "%-10s %-10s %-10s %-10s %-10s\n" "-------" "------" "---" "---" "------"

	# Parse CSV and compute stats per worker count, sorted by median
	tail -n +2 "$RESULTS_FILE" \| grep -v FAILED \| sort -t, -k1 -n \| awk -F, '
	{
	w = $1
	t = $3
	data[w][++count[w]] = t
	}
	END {
	# Sort worker counts
	n = asorti(count, sorted, "@ind_num_asc")
	best_median = 999999
	best_workers = 0

	for (i = 1; i <= n; i++) {
	w = sorted[i]
	c = count[w]

	# Sort samples for this worker count (bubble sort, c is small)
	for (a = 1; a <= c; a++)
	for (b = a + 1; b <= c; b++)
	if (data[w][a] > data[w][b]) {
	tmp = data[w][a]
	data[w][a] = data[w][b]
	data[w][b] = tmp
	}

	median = data[w][int((c + 1) / 2)]
	min_v = data[w][1]
	max_v = data[w][c]
	spread = max_v - min_v

	if (median < best_median) {
	best_median = median
	best_workers = w
	}

	printf "%-10d %-10.2f %-10.2f %-10.2f %-10.2f\n", w, median, min_v, max_v, spread
	}

	print ""
	printf "★ Optimal: %d workers (%.2fs median)\n", best_workers, best_median
	}
	'

	echo ""
	echo "Raw data: $RESULTS_FILE"
No results found