Last active
February 11, 2026 11:38
-
-
Save YuriyGuts/3450c33ad23f89b5308fefd0c5b1b762 to your computer and use it in GitHub Desktop.
Optimal randomized binary search strategy for Steve Ballmer's guessing game (https://www.youtube.com/watch?v=svCYbkS0Sjk)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Optimal randomized binary search strategy for Steve Ballmer's guessing game. | |
| Game: opponent picks `x` in `[1, 100]`, you guess with higher/lower feedback. | |
| Payoff = $6 - (number of guesses). Profit if you find it in 5 guesses or fewer. | |
| Solves the minimax LP over a pool of deterministic BST strategies to find | |
| the optimal randomized mixture. Verifies via Monte Carlo. | |
| Usage | |
| ----- | |
| The script requires `numpy` and `scipy`. | |
| You can run via `uv` to install all dependencies automatically: | |
| uv run ballmer_guessing_game.py | |
| Analysis mode: | |
| python ballmer_guessing_game.py | |
| Interactive play mode: | |
| python ballmer_guessing_game.py --play | |
| """ | |
| # /// script | |
| # dependencies = ["numpy", "scipy"] | |
| # /// | |
| import sys | |
| import time | |
| from collections import defaultdict | |
| from dataclasses import dataclass | |
| import numpy as np | |
| from scipy.optimize import linprog | |
| # The maximum number the opponent can pick. | |
| RANGE_SIZE = 100 | |
| # You get the reward of $(PAYOFF_BASELINE - num_guesses). | |
| PAYOFF_BASELINE = 6 | |
| # The threshold for treating a strategy weight as zero. | |
| NEGLIGIBLE_PROBABILITY = 1e-10 | |
| @dataclass | |
| class BSTNode: | |
| """A single node in a binary search tree.""" | |
| value: int | |
| left: BSTNode | None = None | |
| right: BSTNode | None = None | |
| class BinarySearchTree: | |
| """A navigable binary search tree built from a depth lookup.""" | |
| def __init__(self, root: BSTNode) -> None: | |
| self.root = root | |
| @classmethod | |
| def from_depth_lookup(cls, depth_lookup: dict[int, int]) -> BinarySearchTree: | |
| """Reconstruct BST from target->depth mapping by inserting in breadth-first order.""" | |
| items_by_depth = sorted(depth_lookup.items(), key=lambda pair: (pair[1], pair[0])) | |
| root_node = BSTNode(items_by_depth[0][0]) | |
| for value, _ in items_by_depth[1:]: | |
| new_node = BSTNode(value) | |
| current = root_node | |
| while True: | |
| if value < current.value: | |
| if current.left is None: | |
| current.left = new_node | |
| break | |
| current = current.left | |
| else: | |
| if current.right is None: | |
| current.right = new_node | |
| break | |
| current = current.right | |
| return cls(root_node) | |
| class Strategy: | |
| """A deterministic BST guessing strategy backed by a target-to-depth mapping.""" | |
| def __init__(self, depth_lookup: dict[int, int]) -> None: | |
| self._depth_lookup = depth_lookup | |
| def depth_for(self, target: int) -> int: | |
| """Return the number of guesses needed to find *target*.""" | |
| return self._depth_lookup[target] | |
| def signature(self) -> tuple[int, ...]: | |
| """Return a tuple of depths for targets `1..RANGE_SIZE`, usable as a deduplication key.""" | |
| return tuple(self._depth_lookup.get(target, 0) for target in range(1, RANGE_SIZE + 1)) | |
| def to_search_tree(self) -> BinarySearchTree: | |
| """Build a navigable `BinarySearchTree` from this strategy's depth lookup.""" | |
| return BinarySearchTree.from_depth_lookup(self._depth_lookup) | |
| def extract_active_strategy_distribution( | |
| mixture_probabilities: np.ndarray, | |
| ) -> tuple[np.ndarray, np.ndarray]: | |
| """ | |
| Extract indices and renormalized probabilities of non-negligible strategies. | |
| Parameters | |
| ---------- | |
| mixture_probabilities | |
| Full probability vector over all strategies. | |
| Returns | |
| ------- | |
| indices | |
| Indices of strategies with weight above `NEGLIGIBLE_PROBABILITY`. | |
| probabilities | |
| Corresponding probabilities, renormalized to sum to 1. | |
| """ | |
| indices = np.where(mixture_probabilities > NEGLIGIBLE_PROBABILITY)[0] | |
| probabilities = mixture_probabilities[indices].copy() | |
| probabilities /= probabilities.sum() | |
| return indices, probabilities | |
| def compute_depths_using_balanced_pivots( | |
| range_low: int, | |
| range_high: int, | |
| current_depth: int = 1, | |
| ) -> dict[int, int]: | |
| """ | |
| Build a balanced BST (always pivot on the median) over an integer range. | |
| Parameters | |
| ---------- | |
| range_low | |
| Lower bound of the range (inclusive). | |
| range_high | |
| Upper bound of the range (inclusive). | |
| current_depth | |
| Depth assigned to the root of this subtree. | |
| Returns | |
| ------- | |
| depths | |
| Mapping from each target value in `[range_low, range_high]` to its | |
| search depth in the balanced BST. | |
| """ | |
| if range_low > range_high: | |
| return {} | |
| median = (range_low + range_high) // 2 | |
| depths = {median: current_depth} | |
| depths.update(compute_depths_using_balanced_pivots(range_low, median - 1, current_depth + 1)) | |
| depths.update(compute_depths_using_balanced_pivots(median + 1, range_high, current_depth + 1)) | |
| return depths | |
| def compute_depths_with_chosen_root( | |
| range_low: int, | |
| range_high: int, | |
| chosen_root: int, | |
| current_depth: int = 1, | |
| ) -> dict[int, int]: | |
| """ | |
| Build a BST with a specified root pivot, then balanced splits below. | |
| Parameters | |
| ---------- | |
| range_low | |
| Lower bound of the range (inclusive). | |
| range_high | |
| Upper bound of the range (inclusive). | |
| chosen_root | |
| The pivot value to use at the root of this subtree. | |
| Clamped to `[range_low, range_high]` if out of bounds. | |
| current_depth | |
| Depth assigned to the root of this subtree. | |
| Returns | |
| ------- | |
| depths | |
| Mapping from each target value in [range_low, range_high] to its search depth. | |
| """ | |
| if range_low > range_high: | |
| return {} | |
| chosen_root = max(range_low, min(range_high, chosen_root)) | |
| depths = {chosen_root: current_depth} | |
| depths.update( | |
| compute_depths_using_balanced_pivots(range_low, chosen_root - 1, current_depth + 1) | |
| ) | |
| depths.update( | |
| compute_depths_using_balanced_pivots(chosen_root + 1, range_high, current_depth + 1) | |
| ) | |
| return depths | |
| def choose_pivot_candidates(range_low: int, range_high: int) -> list[int]: | |
| """ | |
| Select a small diverse set of pivot candidates for a given range. | |
| Includes the median, a few values near it, and the two endpoints. | |
| Parameters | |
| ---------- | |
| range_low | |
| Lower bound of the range (inclusive). | |
| range_high | |
| Upper bound of the range (inclusive). | |
| Returns | |
| ------- | |
| candidates | |
| Sorted list of candidate pivot values. | |
| """ | |
| if range_low > range_high: | |
| return [] | |
| # Add the median and both endpoints. | |
| median = (range_low + range_high) // 2 | |
| candidates = {median, range_low, range_high} | |
| # Add a few offsets around the median. | |
| for offset in [-2, -1, +1, +2]: | |
| shifted = median + offset | |
| if range_low <= shifted <= range_high: | |
| candidates.add(shifted) | |
| return sorted(candidates) | |
| def generate_all_strategies() -> tuple[np.ndarray, list[Strategy]]: | |
| """ | |
| Generate a diverse pool of deterministic BST strategies. | |
| Varies the pivot choice at the root (level 1) and at both children | |
| (level 2), then uses balanced (median) splits for everything below. | |
| Deduplicates strategies that produce identical depth profiles. | |
| Returns | |
| ------- | |
| depth_matrix | |
| Shape `(num_strategies, RANGE_SIZE)`. | |
| `depth_matrix[i, x - 1]` is the number of guesses strategy `i` needs to find target `x`. | |
| strategies | |
| Each element is a `Strategy` wrapping a depth lookup for one BST. | |
| """ | |
| print("Generating strategies...") | |
| strategies: list[Strategy] = [] | |
| seen_signatures: set[tuple[int, ...]] = set() | |
| # Build BSTs with all possible roots. | |
| _no_pivot: list[int | None] = [None] | |
| for root_pivot in range(1, RANGE_SIZE + 1): | |
| left_pivot_choices = choose_pivot_candidates(1, root_pivot - 1) or _no_pivot | |
| right_pivot_choices = choose_pivot_candidates(root_pivot + 1, RANGE_SIZE) or _no_pivot | |
| # Build all possible left/right subtree combinations for this root. | |
| for left_pivot in left_pivot_choices: | |
| for right_pivot in right_pivot_choices: | |
| depth_lookup: dict[int, int] = {root_pivot: 1} | |
| if left_pivot is not None: | |
| left_subtree_depths = compute_depths_with_chosen_root( | |
| range_low=1, | |
| range_high=root_pivot - 1, | |
| chosen_root=left_pivot, | |
| current_depth=2, | |
| ) | |
| depth_lookup.update(left_subtree_depths) | |
| if right_pivot is not None: | |
| right_subtree_depths = compute_depths_with_chosen_root( | |
| range_low=root_pivot + 1, | |
| range_high=RANGE_SIZE, | |
| chosen_root=right_pivot, | |
| current_depth=2, | |
| ) | |
| depth_lookup.update(right_subtree_depths) | |
| # Register the new strategy if it wasn't seen before. | |
| strategy = Strategy(depth_lookup) | |
| strategy_signature = strategy.signature() | |
| if strategy_signature not in seen_signatures: | |
| seen_signatures.add(strategy_signature) | |
| strategies.append(strategy) | |
| print(f" {len(strategies)} unique strategies generated") | |
| print() | |
| depth_matrix = np.array( | |
| [ | |
| [strategy.depth_for(target) for target in range(1, RANGE_SIZE + 1)] | |
| for strategy in strategies | |
| ] | |
| ) | |
| return depth_matrix, strategies | |
| def solve_for_optimal_mixture(depth_matrix: np.ndarray) -> tuple[np.ndarray, float]: | |
| """ | |
| Find the optimal randomized strategy via linear programming. | |
| Solves the minimax LP: find a probability distribution over strategies that minimizes | |
| the worst-case expected number of guesses across all possible targets. | |
| Formulation: | |
| minimize worst_case_depth | |
| subject to for each target x: | |
| sum_i probability[i] * depth_matrix[i, x] <= worst_case_depth | |
| sum_i probability[i] = 1 | |
| probability[i] >= 0 | |
| Parameters | |
| ---------- | |
| depth_matrix | |
| Shape `(num_strategies, num_targets)`. | |
| `depth_matrix[i, x]` is the number of guesses strategy `i` | |
| requires to find target `x + 1`. | |
| Returns | |
| ------- | |
| mixture_probabilities | |
| Optimal probability assigned to each strategy. Sums to 1. | |
| Most entries are zero; only a sparse subset of strategies is active. | |
| minimax_expected_depth | |
| The guaranteed worst-case expected number of guesses under the optimal mixture. | |
| Payoff per game is `PAYOFF_BASELINE - minimax_expected_depth`. | |
| """ | |
| num_strategies, num_targets = depth_matrix.shape | |
| print(f"Solving LP ({num_strategies} strategies x {num_targets} targets)...") | |
| # Decision variables: [prob_0, prob_1, ..., prob_{m-1}, worst_case_depth] | |
| num_variables = num_strategies + 1 | |
| # Objective: minimize `worst_case_depth` (the last variable). | |
| objective_coefficients = np.zeros(num_variables) | |
| objective_coefficients[-1] = 1.0 | |
| # Inequality constraints (one per target): | |
| # sum_i prob_i * depth[i, x] - worst_case_depth <= 0 | |
| inequality_matrix = np.zeros((num_targets, num_variables)) | |
| inequality_matrix[:, :num_strategies] = depth_matrix.T | |
| inequality_matrix[:, -1] = -1.0 | |
| inequality_bounds = np.zeros(num_targets) | |
| # Equality constraint: sum of probabilities = 1. | |
| equality_matrix = np.zeros((1, num_variables)) | |
| equality_matrix[0, :num_strategies] = 1.0 | |
| equality_bounds = np.array([1.0]) | |
| # Variable bounds: probabilities >= 0, `worst_case_depth` is unbounded. | |
| variable_bounds = [(0, None)] * num_strategies + [(None, None)] | |
| t0 = time.perf_counter() | |
| result = linprog( | |
| objective_coefficients, | |
| A_ub=inequality_matrix, | |
| b_ub=inequality_bounds, | |
| A_eq=equality_matrix, | |
| b_eq=equality_bounds, | |
| bounds=variable_bounds, | |
| method="highs", | |
| ) | |
| elapsed = time.perf_counter() - t0 | |
| if not result.success: | |
| raise RuntimeError(f"LP solver failed: {result.message}") | |
| mixture_probabilities = result.x[:num_strategies] | |
| minimax_expected_depth: float = result.x[-1] | |
| # Clean up negligible probabilities from floating-point noise. | |
| mixture_probabilities[mixture_probabilities < NEGLIGIBLE_PROBABILITY] = 0 | |
| mixture_probabilities /= mixture_probabilities.sum() | |
| num_active_strategies = int(np.sum(mixture_probabilities > 0)) | |
| expected_payoff = PAYOFF_BASELINE - minimax_expected_depth | |
| print(f" Solved in {elapsed:.3f}s") | |
| print(f" Minimax expected guesses: {minimax_expected_depth:.6f}") | |
| print(f" Expected payoff: ${expected_payoff:.6f}/game") | |
| print(f" Active trees in mixture: {num_active_strategies}") | |
| return mixture_probabilities, minimax_expected_depth | |
| def analyze_mixture( | |
| depth_matrix: np.ndarray, | |
| mixture_probabilities: np.ndarray, | |
| ) -> None: | |
| """ | |
| Print diagnostics for the optimal mixture. | |
| Shows the per-target expected depth profile (worst/best targets) and | |
| the full depth probability distribution for the hardest target. | |
| Parameters | |
| ---------- | |
| depth_matrix | |
| Depth matrix as returned by `generate_all_strategies`. | |
| mixture_probabilities | |
| Optimal mixture weights as returned by `solve_for_optimal_mixture`. | |
| """ | |
| expected_depth_per_target = mixture_probabilities @ depth_matrix | |
| worst_target_index = int(np.argmax(expected_depth_per_target)) | |
| best_target_index = int(np.argmin(expected_depth_per_target)) | |
| worst_depth = expected_depth_per_target[worst_target_index] | |
| best_depth = expected_depth_per_target[best_target_index] | |
| print() | |
| print("Expected depth profile:") | |
| print(f" Minimax equalized at: {worst_depth:.6f}") | |
| print( | |
| " Worst target: " | |
| f"x={worst_target_index + 1:<3d} " | |
| f"E[guesses]={worst_depth:.6f} " | |
| f"E[payoff]={PAYOFF_BASELINE - worst_depth:+.4f}" | |
| ) | |
| print( | |
| " Best target: " | |
| f"x={best_target_index + 1:<3d} " | |
| f"E[guesses]={best_depth:.6f} " | |
| f"E[payoff]={PAYOFF_BASELINE - best_depth:+.4f}" | |
| ) | |
| # Show the depth probability distribution for the hardest target. | |
| active_indices, _ = extract_active_strategy_distribution(mixture_probabilities) | |
| depth_probability: defaultdict[int, float] = defaultdict(float) | |
| for strategy_index in active_indices: | |
| depth = int(depth_matrix[strategy_index, worst_target_index]) | |
| depth_probability[depth] += mixture_probabilities[strategy_index] | |
| print() | |
| print(f" Depth distribution for worst target (x={worst_target_index + 1}):") | |
| for depth in sorted(depth_probability): | |
| probability = depth_probability[depth] | |
| payoff = PAYOFF_BASELINE - depth | |
| bar = "#" * int(probability * 60) | |
| print(f" guesses={depth}: {probability * 100:6.2f}% ${payoff:+d} {bar}") | |
| def run_monte_carlo_verification( | |
| depth_matrix: np.ndarray, | |
| mixture_probabilities: np.ndarray, | |
| rng: np.random.Generator, | |
| num_games_for_worst_target: int = 10_000_000, | |
| num_games_per_target: int = 1_000_000, | |
| ) -> None: | |
| """ | |
| Verify the LP solution via Monte Carlo simulation. | |
| Simulates games against a committed adversary who always picks the | |
| worst-case target (the one with the highest expected depth under the | |
| mixture). Also sweeps all 100 targets to find the empirically worst. | |
| Parameters | |
| ---------- | |
| depth_matrix | |
| Depth matrix as returned by `generate_all_strategies`. | |
| mixture_probabilities | |
| Optimal mixture weights as returned by `solve_for_optimal_mixture`. | |
| rng | |
| Numpy random generator instance. | |
| num_games_for_worst_target | |
| The number of games to simulate for the worst target test. | |
| num_games_per_target | |
| The number of games to simulate for the full target sweep test. | |
| """ | |
| expected_depth_per_target = mixture_probabilities @ depth_matrix | |
| worst_target_index = int(np.argmax(expected_depth_per_target)) | |
| active_indices, active_probabilities = extract_active_strategy_distribution( | |
| mixture_probabilities | |
| ) | |
| print() | |
| print("Monte Carlo verification:") | |
| # 1) Focused test: adversary always picks the theoretical worst target. | |
| sampled_strategies = rng.choice( | |
| active_indices, size=num_games_for_worst_target, p=active_probabilities | |
| ) | |
| guesses_per_game = depth_matrix[sampled_strategies, worst_target_index] | |
| payoffs_per_game = PAYOFF_BASELINE - guesses_per_game | |
| mean_payoff = payoffs_per_game.mean() | |
| standard_error = payoffs_per_game.std() / np.sqrt(num_games_for_worst_target) | |
| ci_95 = (mean_payoff - 1.96 * standard_error, mean_payoff + 1.96 * standard_error) | |
| print( | |
| f" Focused test vs theoretical worst target " | |
| f"(x={worst_target_index + 1}), " | |
| f"{num_games_for_worst_target:,} games:" | |
| ) | |
| print(f" Mean payoff: ${mean_payoff:.4f} +/- ${standard_error:.4f}") | |
| print(f" 95% CI: [${ci_95[0]:.4f}, ${ci_95[1]:.4f}]") | |
| # 2) Full sweep: simulate against every possible target (common random numbers). | |
| sampled = rng.choice(active_indices, size=num_games_per_target, p=active_probabilities) | |
| empirical_payoffs = (PAYOFF_BASELINE - depth_matrix[sampled, :]).mean(axis=0) | |
| worst_idx = int(np.argmin(empirical_payoffs)) | |
| best_idx = int(np.argmax(empirical_payoffs)) | |
| print() | |
| print(f" Full sweep across all {RANGE_SIZE} targets ({num_games_per_target:,} games each):") | |
| print(f" Overall mean payoff: ${empirical_payoffs.mean():.4f}") | |
| print(f" Worst target: x={worst_idx + 1:<3d} ${empirical_payoffs[worst_idx]:.4f}/game") | |
| print(f" Best target: x={best_idx + 1:<3d} ${empirical_payoffs[best_idx]:.4f}/game") | |
| def play_interactive_games( | |
| strategy_list: list[Strategy], | |
| mixture_probabilities: np.ndarray, | |
| rng: np.random.Generator, | |
| ) -> None: | |
| """ | |
| Run an interactive guessing session using the optimal randomized strategy. | |
| Each round the user enters a target number. A BST is sampled from the | |
| optimal mixture and the program walks it to find the target, printing | |
| each guess along the way. | |
| Parameters | |
| ---------- | |
| strategy_list | |
| List of `Strategy` objects as returned by `generate_all_strategies`. | |
| mixture_probabilities | |
| Optimal mixture weights as returned by `solve_for_optimal_mixture`. | |
| rng | |
| Numpy random generator instance. | |
| """ | |
| active_indices, active_probabilities = extract_active_strategy_distribution( | |
| mixture_probabilities | |
| ) | |
| print() | |
| print("=" * 50) | |
| print(" GUESS THE NUMBER - Optimal Randomized Strategy") | |
| print(f" Enter a number 1-{RANGE_SIZE} (or q to quit)") | |
| print("=" * 50) | |
| cumulative_payoff = 0 | |
| games_played = 0 | |
| while True: | |
| raw = input(f"\nGame {games_played + 1}: Enter your number> ").strip().lower() | |
| if raw == "q": | |
| print(f"\n {games_played} games played, net ${cumulative_payoff}") | |
| return | |
| try: | |
| target = int(raw) | |
| except ValueError: | |
| print(f" Please enter an integer 1-{RANGE_SIZE} or q") | |
| continue | |
| if target < 1 or target > RANGE_SIZE: | |
| print(f" Out of range — pick 1-{RANGE_SIZE}") | |
| continue | |
| chosen_strategy_index = rng.choice(active_indices, p=active_probabilities) | |
| tree = strategy_list[chosen_strategy_index].to_search_tree() | |
| # Walk the BST toward the target, printing each guess. | |
| node = tree.root | |
| guess_count = 0 | |
| while node is not None: | |
| guess_count += 1 | |
| if node.value == target: | |
| print(f" #{guess_count}: {node.value}? Correct!") | |
| break | |
| elif target < node.value: | |
| print(f" #{guess_count}: {node.value}? Too high") | |
| node = node.left | |
| else: | |
| print(f" #{guess_count}: {node.value}? Too low") | |
| node = node.right | |
| else: | |
| print(f" Bug: tree does not contain {target}!") | |
| continue | |
| game_payoff = PAYOFF_BASELINE - guess_count | |
| cumulative_payoff += game_payoff | |
| games_played += 1 | |
| print(f" Found in {guess_count} guesses! ${game_payoff:+d} (total ${cumulative_payoff})") | |
| def main() -> None: | |
| """Run the full analysis pipeline and optionally enter interactive mode.""" | |
| rng = np.random.default_rng(42) | |
| depth_matrix, strategy_list = generate_all_strategies() | |
| mixture_probabilities, minimax_depth = solve_for_optimal_mixture(depth_matrix) | |
| analyze_mixture(depth_matrix, mixture_probabilities) | |
| run_monte_carlo_verification(depth_matrix, mixture_probabilities, rng) | |
| expected_payoff = PAYOFF_BASELINE - minimax_depth | |
| num_active = int(np.sum(mixture_probabilities > 0)) | |
| should_play = expected_payoff > 0 | |
| print() | |
| print("=" * 50) | |
| if should_play: | |
| print(" VERDICT: PLAY") | |
| print(f" Guaranteed EV = +${expected_payoff:.4f}/game") | |
| print(f" Mix over {num_active} deterministic BSTs") | |
| print() | |
| print(" Assumptions:") | |
| print(" Adversary must commit to a target before seeing your random pivot choices") | |
| else: | |
| print(" VERDICT: DO NOT PLAY") | |
| print(f" Best achievable EV = ${expected_payoff:.4f}/game") | |
| print(" No profitable strategy exists.") | |
| print("=" * 50) | |
| if "--play" in sys.argv: | |
| play_interactive_games(strategy_list, mixture_probabilities, rng) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment