Skip to content

Instantly share code, notes, and snippets.

@YuriyGuts
Last active February 11, 2026 11:38
Show Gist options
  • Select an option

  • Save YuriyGuts/3450c33ad23f89b5308fefd0c5b1b762 to your computer and use it in GitHub Desktop.

Select an option

Save YuriyGuts/3450c33ad23f89b5308fefd0c5b1b762 to your computer and use it in GitHub Desktop.
Optimal randomized binary search strategy for Steve Ballmer's guessing game (https://www.youtube.com/watch?v=svCYbkS0Sjk)
#!/usr/bin/env python3
"""
Optimal randomized binary search strategy for Steve Ballmer's guessing game.
Game: opponent picks `x` in `[1, 100]`, you guess with higher/lower feedback.
Payoff = $6 - (number of guesses). Profit if you find it in 5 guesses or fewer.
Solves the minimax LP over a pool of deterministic BST strategies to find
the optimal randomized mixture. Verifies via Monte Carlo.
Usage
-----
The script requires `numpy` and `scipy`.
You can run via `uv` to install all dependencies automatically:
uv run ballmer_guessing_game.py
Analysis mode:
python ballmer_guessing_game.py
Interactive play mode:
python ballmer_guessing_game.py --play
"""
# /// script
# dependencies = ["numpy", "scipy"]
# ///
import sys
import time
from collections import defaultdict
from dataclasses import dataclass
import numpy as np
from scipy.optimize import linprog
# The maximum number the opponent can pick.
RANGE_SIZE = 100
# You get the reward of $(PAYOFF_BASELINE - num_guesses).
PAYOFF_BASELINE = 6
# The threshold for treating a strategy weight as zero.
NEGLIGIBLE_PROBABILITY = 1e-10
@dataclass
class BSTNode:
"""A single node in a binary search tree."""
value: int
left: BSTNode | None = None
right: BSTNode | None = None
class BinarySearchTree:
"""A navigable binary search tree built from a depth lookup."""
def __init__(self, root: BSTNode) -> None:
self.root = root
@classmethod
def from_depth_lookup(cls, depth_lookup: dict[int, int]) -> BinarySearchTree:
"""Reconstruct BST from target->depth mapping by inserting in breadth-first order."""
items_by_depth = sorted(depth_lookup.items(), key=lambda pair: (pair[1], pair[0]))
root_node = BSTNode(items_by_depth[0][0])
for value, _ in items_by_depth[1:]:
new_node = BSTNode(value)
current = root_node
while True:
if value < current.value:
if current.left is None:
current.left = new_node
break
current = current.left
else:
if current.right is None:
current.right = new_node
break
current = current.right
return cls(root_node)
class Strategy:
"""A deterministic BST guessing strategy backed by a target-to-depth mapping."""
def __init__(self, depth_lookup: dict[int, int]) -> None:
self._depth_lookup = depth_lookup
def depth_for(self, target: int) -> int:
"""Return the number of guesses needed to find *target*."""
return self._depth_lookup[target]
def signature(self) -> tuple[int, ...]:
"""Return a tuple of depths for targets `1..RANGE_SIZE`, usable as a deduplication key."""
return tuple(self._depth_lookup.get(target, 0) for target in range(1, RANGE_SIZE + 1))
def to_search_tree(self) -> BinarySearchTree:
"""Build a navigable `BinarySearchTree` from this strategy's depth lookup."""
return BinarySearchTree.from_depth_lookup(self._depth_lookup)
def extract_active_strategy_distribution(
mixture_probabilities: np.ndarray,
) -> tuple[np.ndarray, np.ndarray]:
"""
Extract indices and renormalized probabilities of non-negligible strategies.
Parameters
----------
mixture_probabilities
Full probability vector over all strategies.
Returns
-------
indices
Indices of strategies with weight above `NEGLIGIBLE_PROBABILITY`.
probabilities
Corresponding probabilities, renormalized to sum to 1.
"""
indices = np.where(mixture_probabilities > NEGLIGIBLE_PROBABILITY)[0]
probabilities = mixture_probabilities[indices].copy()
probabilities /= probabilities.sum()
return indices, probabilities
def compute_depths_using_balanced_pivots(
range_low: int,
range_high: int,
current_depth: int = 1,
) -> dict[int, int]:
"""
Build a balanced BST (always pivot on the median) over an integer range.
Parameters
----------
range_low
Lower bound of the range (inclusive).
range_high
Upper bound of the range (inclusive).
current_depth
Depth assigned to the root of this subtree.
Returns
-------
depths
Mapping from each target value in `[range_low, range_high]` to its
search depth in the balanced BST.
"""
if range_low > range_high:
return {}
median = (range_low + range_high) // 2
depths = {median: current_depth}
depths.update(compute_depths_using_balanced_pivots(range_low, median - 1, current_depth + 1))
depths.update(compute_depths_using_balanced_pivots(median + 1, range_high, current_depth + 1))
return depths
def compute_depths_with_chosen_root(
range_low: int,
range_high: int,
chosen_root: int,
current_depth: int = 1,
) -> dict[int, int]:
"""
Build a BST with a specified root pivot, then balanced splits below.
Parameters
----------
range_low
Lower bound of the range (inclusive).
range_high
Upper bound of the range (inclusive).
chosen_root
The pivot value to use at the root of this subtree.
Clamped to `[range_low, range_high]` if out of bounds.
current_depth
Depth assigned to the root of this subtree.
Returns
-------
depths
Mapping from each target value in [range_low, range_high] to its search depth.
"""
if range_low > range_high:
return {}
chosen_root = max(range_low, min(range_high, chosen_root))
depths = {chosen_root: current_depth}
depths.update(
compute_depths_using_balanced_pivots(range_low, chosen_root - 1, current_depth + 1)
)
depths.update(
compute_depths_using_balanced_pivots(chosen_root + 1, range_high, current_depth + 1)
)
return depths
def choose_pivot_candidates(range_low: int, range_high: int) -> list[int]:
"""
Select a small diverse set of pivot candidates for a given range.
Includes the median, a few values near it, and the two endpoints.
Parameters
----------
range_low
Lower bound of the range (inclusive).
range_high
Upper bound of the range (inclusive).
Returns
-------
candidates
Sorted list of candidate pivot values.
"""
if range_low > range_high:
return []
# Add the median and both endpoints.
median = (range_low + range_high) // 2
candidates = {median, range_low, range_high}
# Add a few offsets around the median.
for offset in [-2, -1, +1, +2]:
shifted = median + offset
if range_low <= shifted <= range_high:
candidates.add(shifted)
return sorted(candidates)
def generate_all_strategies() -> tuple[np.ndarray, list[Strategy]]:
"""
Generate a diverse pool of deterministic BST strategies.
Varies the pivot choice at the root (level 1) and at both children
(level 2), then uses balanced (median) splits for everything below.
Deduplicates strategies that produce identical depth profiles.
Returns
-------
depth_matrix
Shape `(num_strategies, RANGE_SIZE)`.
`depth_matrix[i, x - 1]` is the number of guesses strategy `i` needs to find target `x`.
strategies
Each element is a `Strategy` wrapping a depth lookup for one BST.
"""
print("Generating strategies...")
strategies: list[Strategy] = []
seen_signatures: set[tuple[int, ...]] = set()
# Build BSTs with all possible roots.
_no_pivot: list[int | None] = [None]
for root_pivot in range(1, RANGE_SIZE + 1):
left_pivot_choices = choose_pivot_candidates(1, root_pivot - 1) or _no_pivot
right_pivot_choices = choose_pivot_candidates(root_pivot + 1, RANGE_SIZE) or _no_pivot
# Build all possible left/right subtree combinations for this root.
for left_pivot in left_pivot_choices:
for right_pivot in right_pivot_choices:
depth_lookup: dict[int, int] = {root_pivot: 1}
if left_pivot is not None:
left_subtree_depths = compute_depths_with_chosen_root(
range_low=1,
range_high=root_pivot - 1,
chosen_root=left_pivot,
current_depth=2,
)
depth_lookup.update(left_subtree_depths)
if right_pivot is not None:
right_subtree_depths = compute_depths_with_chosen_root(
range_low=root_pivot + 1,
range_high=RANGE_SIZE,
chosen_root=right_pivot,
current_depth=2,
)
depth_lookup.update(right_subtree_depths)
# Register the new strategy if it wasn't seen before.
strategy = Strategy(depth_lookup)
strategy_signature = strategy.signature()
if strategy_signature not in seen_signatures:
seen_signatures.add(strategy_signature)
strategies.append(strategy)
print(f" {len(strategies)} unique strategies generated")
print()
depth_matrix = np.array(
[
[strategy.depth_for(target) for target in range(1, RANGE_SIZE + 1)]
for strategy in strategies
]
)
return depth_matrix, strategies
def solve_for_optimal_mixture(depth_matrix: np.ndarray) -> tuple[np.ndarray, float]:
"""
Find the optimal randomized strategy via linear programming.
Solves the minimax LP: find a probability distribution over strategies that minimizes
the worst-case expected number of guesses across all possible targets.
Formulation:
minimize worst_case_depth
subject to for each target x:
sum_i probability[i] * depth_matrix[i, x] <= worst_case_depth
sum_i probability[i] = 1
probability[i] >= 0
Parameters
----------
depth_matrix
Shape `(num_strategies, num_targets)`.
`depth_matrix[i, x]` is the number of guesses strategy `i`
requires to find target `x + 1`.
Returns
-------
mixture_probabilities
Optimal probability assigned to each strategy. Sums to 1.
Most entries are zero; only a sparse subset of strategies is active.
minimax_expected_depth
The guaranteed worst-case expected number of guesses under the optimal mixture.
Payoff per game is `PAYOFF_BASELINE - minimax_expected_depth`.
"""
num_strategies, num_targets = depth_matrix.shape
print(f"Solving LP ({num_strategies} strategies x {num_targets} targets)...")
# Decision variables: [prob_0, prob_1, ..., prob_{m-1}, worst_case_depth]
num_variables = num_strategies + 1
# Objective: minimize `worst_case_depth` (the last variable).
objective_coefficients = np.zeros(num_variables)
objective_coefficients[-1] = 1.0
# Inequality constraints (one per target):
# sum_i prob_i * depth[i, x] - worst_case_depth <= 0
inequality_matrix = np.zeros((num_targets, num_variables))
inequality_matrix[:, :num_strategies] = depth_matrix.T
inequality_matrix[:, -1] = -1.0
inequality_bounds = np.zeros(num_targets)
# Equality constraint: sum of probabilities = 1.
equality_matrix = np.zeros((1, num_variables))
equality_matrix[0, :num_strategies] = 1.0
equality_bounds = np.array([1.0])
# Variable bounds: probabilities >= 0, `worst_case_depth` is unbounded.
variable_bounds = [(0, None)] * num_strategies + [(None, None)]
t0 = time.perf_counter()
result = linprog(
objective_coefficients,
A_ub=inequality_matrix,
b_ub=inequality_bounds,
A_eq=equality_matrix,
b_eq=equality_bounds,
bounds=variable_bounds,
method="highs",
)
elapsed = time.perf_counter() - t0
if not result.success:
raise RuntimeError(f"LP solver failed: {result.message}")
mixture_probabilities = result.x[:num_strategies]
minimax_expected_depth: float = result.x[-1]
# Clean up negligible probabilities from floating-point noise.
mixture_probabilities[mixture_probabilities < NEGLIGIBLE_PROBABILITY] = 0
mixture_probabilities /= mixture_probabilities.sum()
num_active_strategies = int(np.sum(mixture_probabilities > 0))
expected_payoff = PAYOFF_BASELINE - minimax_expected_depth
print(f" Solved in {elapsed:.3f}s")
print(f" Minimax expected guesses: {minimax_expected_depth:.6f}")
print(f" Expected payoff: ${expected_payoff:.6f}/game")
print(f" Active trees in mixture: {num_active_strategies}")
return mixture_probabilities, minimax_expected_depth
def analyze_mixture(
depth_matrix: np.ndarray,
mixture_probabilities: np.ndarray,
) -> None:
"""
Print diagnostics for the optimal mixture.
Shows the per-target expected depth profile (worst/best targets) and
the full depth probability distribution for the hardest target.
Parameters
----------
depth_matrix
Depth matrix as returned by `generate_all_strategies`.
mixture_probabilities
Optimal mixture weights as returned by `solve_for_optimal_mixture`.
"""
expected_depth_per_target = mixture_probabilities @ depth_matrix
worst_target_index = int(np.argmax(expected_depth_per_target))
best_target_index = int(np.argmin(expected_depth_per_target))
worst_depth = expected_depth_per_target[worst_target_index]
best_depth = expected_depth_per_target[best_target_index]
print()
print("Expected depth profile:")
print(f" Minimax equalized at: {worst_depth:.6f}")
print(
" Worst target: "
f"x={worst_target_index + 1:<3d} "
f"E[guesses]={worst_depth:.6f} "
f"E[payoff]={PAYOFF_BASELINE - worst_depth:+.4f}"
)
print(
" Best target: "
f"x={best_target_index + 1:<3d} "
f"E[guesses]={best_depth:.6f} "
f"E[payoff]={PAYOFF_BASELINE - best_depth:+.4f}"
)
# Show the depth probability distribution for the hardest target.
active_indices, _ = extract_active_strategy_distribution(mixture_probabilities)
depth_probability: defaultdict[int, float] = defaultdict(float)
for strategy_index in active_indices:
depth = int(depth_matrix[strategy_index, worst_target_index])
depth_probability[depth] += mixture_probabilities[strategy_index]
print()
print(f" Depth distribution for worst target (x={worst_target_index + 1}):")
for depth in sorted(depth_probability):
probability = depth_probability[depth]
payoff = PAYOFF_BASELINE - depth
bar = "#" * int(probability * 60)
print(f" guesses={depth}: {probability * 100:6.2f}% ${payoff:+d} {bar}")
def run_monte_carlo_verification(
depth_matrix: np.ndarray,
mixture_probabilities: np.ndarray,
rng: np.random.Generator,
num_games_for_worst_target: int = 10_000_000,
num_games_per_target: int = 1_000_000,
) -> None:
"""
Verify the LP solution via Monte Carlo simulation.
Simulates games against a committed adversary who always picks the
worst-case target (the one with the highest expected depth under the
mixture). Also sweeps all 100 targets to find the empirically worst.
Parameters
----------
depth_matrix
Depth matrix as returned by `generate_all_strategies`.
mixture_probabilities
Optimal mixture weights as returned by `solve_for_optimal_mixture`.
rng
Numpy random generator instance.
num_games_for_worst_target
The number of games to simulate for the worst target test.
num_games_per_target
The number of games to simulate for the full target sweep test.
"""
expected_depth_per_target = mixture_probabilities @ depth_matrix
worst_target_index = int(np.argmax(expected_depth_per_target))
active_indices, active_probabilities = extract_active_strategy_distribution(
mixture_probabilities
)
print()
print("Monte Carlo verification:")
# 1) Focused test: adversary always picks the theoretical worst target.
sampled_strategies = rng.choice(
active_indices, size=num_games_for_worst_target, p=active_probabilities
)
guesses_per_game = depth_matrix[sampled_strategies, worst_target_index]
payoffs_per_game = PAYOFF_BASELINE - guesses_per_game
mean_payoff = payoffs_per_game.mean()
standard_error = payoffs_per_game.std() / np.sqrt(num_games_for_worst_target)
ci_95 = (mean_payoff - 1.96 * standard_error, mean_payoff + 1.96 * standard_error)
print(
f" Focused test vs theoretical worst target "
f"(x={worst_target_index + 1}), "
f"{num_games_for_worst_target:,} games:"
)
print(f" Mean payoff: ${mean_payoff:.4f} +/- ${standard_error:.4f}")
print(f" 95% CI: [${ci_95[0]:.4f}, ${ci_95[1]:.4f}]")
# 2) Full sweep: simulate against every possible target (common random numbers).
sampled = rng.choice(active_indices, size=num_games_per_target, p=active_probabilities)
empirical_payoffs = (PAYOFF_BASELINE - depth_matrix[sampled, :]).mean(axis=0)
worst_idx = int(np.argmin(empirical_payoffs))
best_idx = int(np.argmax(empirical_payoffs))
print()
print(f" Full sweep across all {RANGE_SIZE} targets ({num_games_per_target:,} games each):")
print(f" Overall mean payoff: ${empirical_payoffs.mean():.4f}")
print(f" Worst target: x={worst_idx + 1:<3d} ${empirical_payoffs[worst_idx]:.4f}/game")
print(f" Best target: x={best_idx + 1:<3d} ${empirical_payoffs[best_idx]:.4f}/game")
def play_interactive_games(
strategy_list: list[Strategy],
mixture_probabilities: np.ndarray,
rng: np.random.Generator,
) -> None:
"""
Run an interactive guessing session using the optimal randomized strategy.
Each round the user enters a target number. A BST is sampled from the
optimal mixture and the program walks it to find the target, printing
each guess along the way.
Parameters
----------
strategy_list
List of `Strategy` objects as returned by `generate_all_strategies`.
mixture_probabilities
Optimal mixture weights as returned by `solve_for_optimal_mixture`.
rng
Numpy random generator instance.
"""
active_indices, active_probabilities = extract_active_strategy_distribution(
mixture_probabilities
)
print()
print("=" * 50)
print(" GUESS THE NUMBER - Optimal Randomized Strategy")
print(f" Enter a number 1-{RANGE_SIZE} (or q to quit)")
print("=" * 50)
cumulative_payoff = 0
games_played = 0
while True:
raw = input(f"\nGame {games_played + 1}: Enter your number> ").strip().lower()
if raw == "q":
print(f"\n {games_played} games played, net ${cumulative_payoff}")
return
try:
target = int(raw)
except ValueError:
print(f" Please enter an integer 1-{RANGE_SIZE} or q")
continue
if target < 1 or target > RANGE_SIZE:
print(f" Out of range — pick 1-{RANGE_SIZE}")
continue
chosen_strategy_index = rng.choice(active_indices, p=active_probabilities)
tree = strategy_list[chosen_strategy_index].to_search_tree()
# Walk the BST toward the target, printing each guess.
node = tree.root
guess_count = 0
while node is not None:
guess_count += 1
if node.value == target:
print(f" #{guess_count}: {node.value}? Correct!")
break
elif target < node.value:
print(f" #{guess_count}: {node.value}? Too high")
node = node.left
else:
print(f" #{guess_count}: {node.value}? Too low")
node = node.right
else:
print(f" Bug: tree does not contain {target}!")
continue
game_payoff = PAYOFF_BASELINE - guess_count
cumulative_payoff += game_payoff
games_played += 1
print(f" Found in {guess_count} guesses! ${game_payoff:+d} (total ${cumulative_payoff})")
def main() -> None:
"""Run the full analysis pipeline and optionally enter interactive mode."""
rng = np.random.default_rng(42)
depth_matrix, strategy_list = generate_all_strategies()
mixture_probabilities, minimax_depth = solve_for_optimal_mixture(depth_matrix)
analyze_mixture(depth_matrix, mixture_probabilities)
run_monte_carlo_verification(depth_matrix, mixture_probabilities, rng)
expected_payoff = PAYOFF_BASELINE - minimax_depth
num_active = int(np.sum(mixture_probabilities > 0))
should_play = expected_payoff > 0
print()
print("=" * 50)
if should_play:
print(" VERDICT: PLAY")
print(f" Guaranteed EV = +${expected_payoff:.4f}/game")
print(f" Mix over {num_active} deterministic BSTs")
print()
print(" Assumptions:")
print(" Adversary must commit to a target before seeing your random pivot choices")
else:
print(" VERDICT: DO NOT PLAY")
print(f" Best achievable EV = ${expected_payoff:.4f}/game")
print(" No profitable strategy exists.")
print("=" * 50)
if "--play" in sys.argv:
play_interactive_games(strategy_list, mixture_probabilities, rng)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment