Skip to content

Instantly share code, notes, and snippets.

@mhoehle
Last active December 17, 2025 13:23
Show Gist options
  • Select an option

  • Save mhoehle/ecf7774e492d2a4d8699948cfb126746 to your computer and use it in GitHub Desktop.

Select an option

Save mhoehle/ecf7774e492d2a4d8699948cfb126746 to your computer and use it in GitHub Desktop.
R code to experiment with the combinatorial calculation of secret santa
########################################
# Functions for performing combinatorial
# computations for secret santa permutations
#
# Author: Michael Höhle
# Date: 2025-12-15
#
# (based on Stoch 24/25 code developed 2024-12-20)
########################################
pacman::p_load(tidyverse)
# Work with large integers
pacman::p_load(gmp)
####################################################
# Algorithm 1: sequential drawing with redraws
# if the drawer id draws their own id
#
# Params:
# n - number of participants
####################################################
seqperm_has_fixpts <- function(n) {
# names in the hat
names_left <- 1:n
# Loop over who is drawing (1:(n-1)). The last person does not need to draw, since its fixed who she gets.
for (i in 1:(n-1)) {
#print(names_left)
# # Repeat until a valid name is drawn
# repeat {
# id <- sample(menge, size=1, replace=FALSE)
# if (id != i) break
# }
# More efficient version
valid_candidates <- setdiff(names_left, i)
if (length(valid_candidates) > 1) {
# Warning: if x is a vector of length one, then the sampling is interpreted as being from the set 1:x!
id <- sample(x=valid_candidates, size=1, replace=FALSE)
} else {
id <- valid_candidates
}
# Remove the name drawn from the hat.
names_left <- setdiff(names_left, id)
}
# fixed point, if the n'th person can only select themself
#print(names_left)
#print(names_left == n)
return(names_left == n)
}
set.seed(123)
replicate(10, seqperm_has_fixpts(n=10))
# Using the integer sequences
# Source: https://oeis.org/A136300
aseq <- readr::read_delim("a136300.txt", col_types = c("ic")) |>
#aseq <- readr::read_delim("https://oeis.org/A136300/b136300.txt", col_types = c("ic"), col_names=FALSE) |>
setNames(c("n","a(n)")) |>
rowwise() |>
# Numerator: Number of algorithmic sequences with fixed point
# Denominator: Total number of algorithmic sequences
mutate(num = list(as.bigz(`a(n)`)),
denom = list(as.bigz( factorial(n - 1)^2)))
# Compute probabilty for failure of algo
p_oeis <- function(n) {
stopifnot( all(n>=1) & (n<=30))
do_one <- function(n) {
as.numeric(aseq$num[[n]] / aseq$denom[[n]])
}
Vectorize(FUN=do_one)(n)
}
p_oeis(n=c(1,4,10,30))
# Recursion taken from
# https://possiblywrong.wordpress.com/2011/12/05/secret-santa-revisited/
# and implemented in R.
#
# Description:
# given n total participants, let m be the number
# still waiting to draw, and let k \leq m be the number waiting to draw whose names have already been drawn.
# Then the probability that the last person to draw is left with his own name is p(n,0),
p <- function(m,k) {
#cat("m=", m, "k=", k, "\n")
if (k<0) return(0)
if (m==1) if (k<=1) return(1-k) else return(0)
res <- k/m * ((k/m) * p(m-1, k-1) + (1-k/m)*p(m-1,k)) +
(1-k/m)*( k/(m-1)*p(m-1,k) + (1-k/(m-1))*p(m-1,k+1))
return(res)
}
pFixpoint <- function(n) {
p(m=n, k=0)
}
#################################################
# Example scenario with n=4 particpants
#################################################
n <- 4
# Simulation (slow)
mean(replicate(1e5, seqperm_has_fixpts(n)))
# Simulation (faster: parallel version)
library(future)
library(future.apply)
plan(multisession) # or multicore on Linux/macOS
res <- future_replicate(
1e5,
seqperm_has_fixpts(n),
future.seed = TRUE
)
mean(res)
plan(sequential)
# Exact (own derivation by hand)
1/18 + 1/12
# From the integer sequences - https://oeis.org/A136300
p_oeis(n=4)
# exact computation
5 / factorial(n-1)^2
# using the recursive function
pFixpoint(n=n)
## Check branch probabilities for Algo 1 for n=4.
## These are the numbers on p. 14 (Appendix)
branch_prob4 <- c(# 1 picks 2
1/3*1/3*1*1,
1/3*1/3*1/2*1,
1/3*1/3*1/2*1,
1/3*1/3*1*1,
# 1 picks 3
1/3*1/2*1/2*1,
1/3*1/2*1/2*1,
1/3*1/2*1/2*1,
1/3*1/2*1/2*1,
# 1 picks 4
1/3*1/2*1*1,
1/3*1/2*1/2*1,
1/3*1/2*1/2*1
)
four_gives <- c(3,4,1,3,4,2,2,1,3,2,1)
sum(branch_prob4)
c(sum(branch_prob4[four_gives == 1]), 8/36)
c(sum(branch_prob4[four_gives == 2]), 9/36)
c(sum(branch_prob4[four_gives == 3]), 14/36)
c(sum(branch_prob4[four_gives == 4]), 5/36)
c(8,9,14) / sum(c(8,9,14))
#################################################
# Example scenario with n=10 particpants
# (own calculation not possibly anymore!)
#################################################
n <- 10
p_oeis(n=n)
mean(future_replicate(
1e5,
seqperm_has_fixpts(n),
future.seed = TRUE
))
pFixpoint(n=n)
##############################################
# Graph of P(at least one fixed point) as a
# function of n
##############################################
df <- tibble(n=2:30, p=p_oeis(n=n), p_random=1/n, p_asymp=1/(n+1))
ggplot(df, aes(x=n, ymin=0, ymax=p)) + geom_linerange() +
ylab("P(Permutation has fixed point)") +
geom_line(aes(y=p_random), col="steelblue") +
geom_line(aes(y=p_asymp), col="magenta") +
scale_x_continuous(minor_breaks = 1:30)
ggsave(filename="P_fixedpoint.png", width=8, height=4, unit="in")
# Are always below the probability of a fail
# from a random assignment.
all(df$p < df$p_random)
df$p / df$p_random
##################################################
# algorithm 2: everybody draws a card/name randomly,
# only when everybody has drawn will it be checked
# if anybody has drawn their own name.
# the function is 1 if there are fixpoints and
# 0 otherwise.
#
# Note:
# It can be shown that
# \lim_{n\rightarrow\infty} P(Permutation of the n participants has fixpoints) -> 1-1/e \approx 0.63
# Thus P(Perm has no fixpoints) -> 1/e
#################################################
permutation_has_fixpts <- function(n) {
set <- 1L:n
perm <- sample(set, size=n, replace=FALSE)
return(any(set == perm))
}
#
# pacman::p_load("stat.extend")
#' Logarithm of the subfactorial numbers
#' Function:
#' Author: Ben O'Neill
#'
#' Fetched from https://github.com/cran/stat.extend/blob/master/R/lsubfactorial.R
#' \code{lsubfactorial} returns the logarithms of the subfactorial numbers.
#' See also https://en.wikipedia.org/wiki/Derangement -
#' The subfactorial numbers count the number of derangements of a set of objects (permutations in which no element
#' appears in its original position). This function computes the logarithms of the subfactorial numbers for a given
#' input vector specifying the numbers of interest.
#'
#' @param x A vector of non-negative integers
#' @return If the input is a vector of non-negative integers, the output will be a vector of the logarithms of
#' the corresponding subfactorial numbers.
#'
#' @examples
#' # In the limit n! / !n goes to e
#' # so limit of differences of logs is 1
#' lfactorial(1000) - lsubfactorial(1000)
#'
#' See also https://oeis.org/A000166
lsubfactorial <- function(x) {
#Compute values of the subfactorials recursively
MAX <- max(x, na.rm = TRUE)
LL <- rep(0, max(2,MAX+1))
LL[2] <- -Inf
if (MAX > 1) {
for (k in 2:MAX) {
LL[k+1] <- log(k-1) + matrixStats::logSumExp(c(LL[k], LL[k-1])) } }
#Compute output
OUT <- rep(0, length(x))
for (i in seq_along(x)) {
if (x[i] < 0 || is.na(x[i]) ) {
OUT[i] <- NA }
else {
OUT[i] <- LL[x[i]+1]}}
#Return output
OUT
}
# Scenario with n=4
n <- 4L
mean(replicate(1e5, permutation_has_fixpts(n)))
################################################
# Check if a permutation x of the elements 1:length(x)
# has a fixed point.
#
# Params:
# x - the permutation, p(1) = x[1], ..., p(n)=x[n]
#
# Returns:
# Bolean, if \exist i \in {1,...,n}, s.t. p(i)=i.
################################################
has_fixed_point <- function(x) {
any(x == 1:length(x))
}
# Generate all possible permutations of 1:n
# and check for each, if it has at least one fixed point
permutations <- arrangements::permutations(n=n) |>
as_tibble(.name_repair="universal") |>
rowwise() |>
mutate(HAS_FIXED_POINT = has_fixed_point(c_across(everything())))
# Check if derangements are s.t. the probability is
# is equal that i becomes the secret santa of j, j\neq i.
table(permutations$...1)
table(permutations$...2)
table(permutations$...3)
table(permutations$...4)
# Make a table
knitr::kable(t(permutations), row.names = FALSE)
sum(permutations$HAS_FIXED_POINT)
length(permutations$HAS_FIXED_POINT)
mean(permutations$HAS_FIXED_POINT)
1-1/exp(1)
# Prob for at least one fixed point in the permuation
1-exp(lsubfactorial(n) - lfactorial(n))
1-1/(exp(1))
exp(lsubfactorial(x=10))
# For comparison: https://oeis.org/A000166 für n=10: 10 1334961
# own inefficient implementation of the subfactorial
subfactorial_bydef <- function(n) {
k_idx <- 0:n
factorial(n) * sum((-1)^k_idx / factorial(k_idx))
}
1 - subfactorial_bydef(n)/factorial(n)
# A plot
df <- tibble(n=2:25L, permutations=factorial(n), dearangements=exp(lsubfactorial(n))) %>%
pivot_longer(-n, values_to="nConfig")
ggplot(df, aes(x = n, y = nConfig, color=name)) +
geom_line() +
scale_y_log10() +
theme(legend.position="bottom")
# Recursive formula for fail probability of Algo 2.
# Corresponding in style to the function from the Wordpress blog post (but different sampling)
# q(m,k) = \frac{k}{m} \, q(m-1,k-1) + \Big(1-\frac{k}{m}\Big) q(m-1,k)
p_algo2 <- function(m, k) {
# invalid states
if (k < 0 || k > m) return(0)
# no one left: no failure possible
if (m == 0) return(0)
# one person left
if (m == 1) return(1 - k)
# recursion
return(
(k / m) * p_algo2(m - 1, k - 1) +
(1 - k / m) * p_algo2(m - 1, k)
)
}
p_algo2(m=10, k=0)
p_fail_algo2 <- function(n) {
p_algo2(m=n,k=0)
}
p_fail_algo2(n=10)
##########################################
# Number of runs of Algo 2 before a valid
# configuration is found.
###########################################
# PMF for the number of attempts - we know
# the success probability of Algo 2 is 1/e
tibble(trials=0:(2*n), pmf=dgeom(trials, prob=1/exp(1))) %>%
ggplot(aes(x=trials+1, ymin=0, ymax=pmf)) +
geom_linerange() +
xlab("Number of runs")
runs <- rgeom(1e5, prob=1/exp(1)) + 1
mean(runs)
# Expected number of runs of the algorithm
1/ (1/exp(1))
##########
### Asymptotics of Algo 1
##########
# p_n = \prod_{i=1}^{n-1} \left(1 - \frac{1}{n-i}\right).
p_fail_algo1 <- function(n) {
i_set <- seq_len(n-2)
prod(1 - 1/(n-i_set))*1/2
}
p_oeis(n=4)
p_fail_algo1(n=4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment