mhoehle · December 17, 2025 13:23
diff --git a/secret_santa.R b/secret_santa.R
 ########################################
 # Functions for performing combinatorial
 # computations for secret santa permutations
 #
 # Author: Michael Höhle
 # Date: 2025-12-15
 #
 # (based on Stoch 24/25 code developed 2024-12-20)
 ########################################

 pacman::p_load(tidyverse)
 # Work with large integers
 pacman::p_load(gmp)

 ####################################################
 # Algorithm 1: sequential drawing with redraws
 # if the drawer id draws their own id
 #
 # Params:
 #  n - number of participants
 ####################################################

 seqperm_has_fixpts <- function(n) {
  # names in the hat
  names_left <- 1:n
  # Loop over who is drawing (1:(n-1)). The last person does not need to draw, since its fixed who she gets.
  for (i in 1:(n-1)) {
    #print(names_left)
    # # Repeat until a valid name is drawn
    # repeat {
    #   id <- sample(menge, size=1, replace=FALSE)
    #   if (id != i) break
    # }
    # More efficient version
    valid_candidates <- setdiff(names_left, i)
    if (length(valid_candidates) > 1) {
      # Warning: if x is a vector of length one, then the sampling is interpreted as being from the set 1:x!
      id <- sample(x=valid_candidates, size=1, replace=FALSE)
    } else {
      id <- valid_candidates
    }
    # Remove the name drawn from the hat.
    names_left <- setdiff(names_left, id)
  }
  # fixed point, if the n'th person can only select themself
  #print(names_left)
  #print(names_left == n)
  return(names_left == n)
 }

 set.seed(123)
 replicate(10, seqperm_has_fixpts(n=10))

 # Using the integer sequences
 # Source: https://oeis.org/A136300
 aseq <- readr::read_delim("a136300.txt", col_types = c("ic")) |>
 #aseq <- readr::read_delim("https://oeis.org/A136300/b136300.txt", col_types = c("ic"), col_names=FALSE) |>
  setNames(c("n","a(n)")) |>
  rowwise() |>
  # Numerator: Number of algorithmic sequences with fixed point
  # Denominator: Total number of algorithmic sequences
  mutate(num = list(as.bigz(`a(n)`)),
         denom = list(as.bigz( factorial(n - 1)^2)))
 
 # Compute probabilty for failure of algo
 p_oeis <- function(n) {
  stopifnot( all(n>=1) & (n<=30))
  do_one <- function(n) {
    as.numeric(aseq$num[[n]] / aseq$denom[[n]])
  }
  Vectorize(FUN=do_one)(n)
 }

 p_oeis(n=c(1,4,10,30))


 # Recursion taken from
 # https://possiblywrong.wordpress.com/2011/12/05/secret-santa-revisited/
 # and implemented in R.
 #
 # Description:
 # given n total participants, let m be the number
 # still waiting to draw, and let k \leq m be the number waiting to draw whose names have already been drawn. 
 # Then the probability that the last person to draw is left with his own name is p(n,0),

 p <- function(m,k) {
  #cat("m=", m, "k=", k, "\n")
  if (k<0) return(0)
  if (m==1) if (k<=1) return(1-k) else return(0)
  
  res <- k/m * ((k/m) * p(m-1, k-1) + (1-k/m)*p(m-1,k)) +
    (1-k/m)*( k/(m-1)*p(m-1,k) + (1-k/(m-1))*p(m-1,k+1))
  return(res)
 }

 pFixpoint <- function(n) {
  p(m=n, k=0)
 }

 #################################################
 # Example scenario with n=4 particpants
 #################################################

 n <- 4
 # Simulation (slow)
 mean(replicate(1e5, seqperm_has_fixpts(n)))

 # Simulation (faster: parallel version)
 library(future)
 library(future.apply)
 plan(multisession)   # or multicore on Linux/macOS

 res <- future_replicate(
  1e5,
  seqperm_has_fixpts(n),
  future.seed = TRUE
 )
 mean(res)
 plan(sequential)
 # Exact (own derivation by hand)
 1/18 + 1/12
 # From the integer sequences - https://oeis.org/A136300
 p_oeis(n=4)
 # exact computation
 5 / factorial(n-1)^2
 # using the recursive function
 pFixpoint(n=n)


 ## Check branch probabilities for Algo 1 for n=4.
 ## These are the numbers on p. 14 (Appendix)

 branch_prob4 <- c(# 1 picks 2
  1/3*1/3*1*1,
  1/3*1/3*1/2*1,
  1/3*1/3*1/2*1,
  1/3*1/3*1*1,
  # 1 picks 3
  1/3*1/2*1/2*1,
  1/3*1/2*1/2*1,
  1/3*1/2*1/2*1,
  1/3*1/2*1/2*1,
  # 1 picks 4
  1/3*1/2*1*1,
  1/3*1/2*1/2*1,
  1/3*1/2*1/2*1
 )
 four_gives <- c(3,4,1,3,4,2,2,1,3,2,1)
 sum(branch_prob4)
 c(sum(branch_prob4[four_gives == 1]), 8/36)
 c(sum(branch_prob4[four_gives == 2]), 9/36)
 c(sum(branch_prob4[four_gives == 3]), 14/36)
 c(sum(branch_prob4[four_gives == 4]), 5/36)
 c(8,9,14) / sum(c(8,9,14))


 #################################################
 # Example scenario with n=10 particpants
 # (own calculation not possibly anymore!)
 #################################################

 n <- 10 
 p_oeis(n=n)

 mean(future_replicate(
  1e5,
  seqperm_has_fixpts(n),
  future.seed = TRUE
 ))
 pFixpoint(n=n)

 ##############################################
 # Graph of P(at least one fixed point) as a 
 # function of n
 ##############################################

 df <- tibble(n=2:30, p=p_oeis(n=n), p_random=1/n, p_asymp=1/(n+1))
 ggplot(df, aes(x=n, ymin=0, ymax=p)) + geom_linerange() +
  ylab("P(Permutation has fixed point)") +
  geom_line(aes(y=p_random), col="steelblue") +
  geom_line(aes(y=p_asymp), col="magenta") +
  scale_x_continuous(minor_breaks = 1:30) 
 ggsave(filename="P_fixedpoint.png", width=8, height=4, unit="in")
 # Are always below the probability of a fail
 # from a random assignment.
 all(df$p < df$p_random)
 df$p / df$p_random

 ##################################################
 # algorithm 2: everybody draws a card/name randomly, 
 # only when everybody has drawn will it be checked 
 # if anybody has drawn their own name. 
 # the function is 1 if there are fixpoints and 
 # 0 otherwise.
 #
 # Note:
 # It can be shown that
 # \lim_{n\rightarrow\infty} P(Permutation of the n participants has fixpoints) -> 1-1/e \approx 0.63
 # Thus P(Perm has no fixpoints) -> 1/e
 #################################################

 permutation_has_fixpts <- function(n) {
  set <- 1L:n
  perm <- sample(set, size=n, replace=FALSE)
  return(any(set == perm))
 }


 #
 # pacman::p_load("stat.extend")

 #' Logarithm of the subfactorial numbers
 #' Function: 
 #' Author: Ben O'Neill
 #' 
 #' Fetched from https://github.com/cran/stat.extend/blob/master/R/lsubfactorial.R
 #' \code{lsubfactorial} returns the logarithms of the subfactorial numbers.
 #' See also https://en.wikipedia.org/wiki/Derangement - 
 #' The subfactorial numbers count the number of derangements of a set of objects (permutations in which no element
 #' appears in its original position).  This function computes the logarithms of the subfactorial numbers for a given
 #' input vector specifying the numbers of interest.
 #'
 #' @param x A vector of non-negative integers
 #' @return If the input is a vector of non-negative integers, the output will be a vector of the logarithms of
 #' the corresponding subfactorial numbers.
 #'
 #' @examples 
 #' # In the limit n! / !n goes to e
 #' # so limit of differences of logs is 1
 #' lfactorial(1000) - lsubfactorial(1000)
 #' 
 #' See also https://oeis.org/A000166

 lsubfactorial <- function(x) {
  #Compute values of the subfactorials recursively
  MAX <- max(x, na.rm = TRUE)
  LL  <- rep(0, max(2,MAX+1))
  LL[2] <- -Inf
  if (MAX > 1) {
    for (k in 2:MAX) {
      LL[k+1] <- log(k-1) + matrixStats::logSumExp(c(LL[k], LL[k-1])) } }
  
  #Compute output
  OUT <- rep(0, length(x))
  for (i in seq_along(x)) {
    if (x[i] < 0  || is.na(x[i]) ) {
      OUT[i] <- NA } 
    else {
      OUT[i] <- LL[x[i]+1]}}
  
  #Return output
  OUT 
 }

 # Scenario with n=4
 n <- 4L
 mean(replicate(1e5, permutation_has_fixpts(n)))

 ################################################
 # Check if a permutation x of the elements 1:length(x)
 # has a fixed point.
 #
 # Params:
 #  x - the permutation, p(1) = x[1], ..., p(n)=x[n]
 #
 # Returns:
 #  Bolean, if \exist i \in {1,...,n}, s.t. p(i)=i.
 ################################################

 has_fixed_point <- function(x) {
  any(x == 1:length(x))
 }

 # Generate all possible permutations of 1:n
 # and check for each, if it has at least one fixed point 
 permutations <- arrangements::permutations(n=n) |>
  as_tibble(.name_repair="universal") |>
  rowwise() |>
  mutate(HAS_FIXED_POINT =  has_fixed_point(c_across(everything())))

 # Check if derangements are s.t. the probability is 
 # is equal that i becomes the secret santa of j, j\neq i.
 table(permutations$...1)
 table(permutations$...2)
 table(permutations$...3)
 table(permutations$...4)

 # Make a table
 knitr::kable(t(permutations), row.names = FALSE)
 sum(permutations$HAS_FIXED_POINT)
 length(permutations$HAS_FIXED_POINT)
 mean(permutations$HAS_FIXED_POINT)
 1-1/exp(1)

 # Prob for at least one fixed point in the permuation
 1-exp(lsubfactorial(n) - lfactorial(n))
 1-1/(exp(1))
 exp(lsubfactorial(x=10))
 # For comparison: https://oeis.org/A000166 für n=10: 10 1334961

 # own inefficient implementation of the subfactorial
 subfactorial_bydef <- function(n) {
  k_idx <- 0:n
  factorial(n) * sum((-1)^k_idx / factorial(k_idx))
 }
 1 - subfactorial_bydef(n)/factorial(n)

 # A plot
 df <- tibble(n=2:25L, permutations=factorial(n), dearangements=exp(lsubfactorial(n))) %>% 
  pivot_longer(-n, values_to="nConfig")

 ggplot(df, aes(x = n, y = nConfig, color=name)) +
  geom_line() +
  scale_y_log10() +
  theme(legend.position="bottom")

 # Recursive formula for fail probability of Algo 2.
 # Corresponding in style to the function from the Wordpress blog post (but different sampling)
 #  q(m,k) = \frac{k}{m} \, q(m-1,k-1) + \Big(1-\frac{k}{m}\Big) q(m-1,k)

 p_algo2 <- function(m, k) {
  # invalid states
  if (k < 0 || k > m) return(0)
  
  # no one left: no failure possible
  if (m == 0) return(0)
  
  # one person left
  if (m == 1) return(1 - k)
  
  # recursion
  return(
    (k / m) * p_algo2(m - 1, k - 1) +
      (1 - k / m) * p_algo2(m - 1, k)
  )
 }
 p_algo2(m=10, k=0)
 p_fail_algo2 <- function(n) {
  p_algo2(m=n,k=0)
 }
 p_fail_algo2(n=10)

 ##########################################
 # Number of runs of Algo 2 before a valid 
 # configuration is found.
 ###########################################

 # PMF for the number of attempts - we know
 # the success probability of Algo 2 is 1/e
 tibble(trials=0:(2*n), pmf=dgeom(trials, prob=1/exp(1))) %>% 
  ggplot(aes(x=trials+1, ymin=0, ymax=pmf)) + 
  geom_linerange() +
  xlab("Number of runs")
 runs <- rgeom(1e5, prob=1/exp(1)) + 1
 mean(runs)
 # Expected number of runs of the algorithm 
 1/ (1/exp(1))

 ##########
 ### Asymptotics of Algo 1
 ##########

 # p_n = \prod_{i=1}^{n-1} \left(1 - \frac{1}{n-i}\right).

 p_fail_algo1 <- function(n) {
  i_set <- seq_len(n-2)
  prod(1 - 1/(n-i_set))*1/2
 }
 p_oeis(n=4)
 p_fail_algo1(n=4)
	########################################
	# Functions for performing combinatorial
	# computations for secret santa permutations
	#
	# Author: Michael Höhle
	# Date: 2025-12-15
	#
	# (based on Stoch 24/25 code developed 2024-12-20)
	########################################

	pacman::p_load(tidyverse)
	# Work with large integers
	pacman::p_load(gmp)

	####################################################
	# Algorithm 1: sequential drawing with redraws
	# if the drawer id draws their own id
	#
	# Params:
	# n - number of participants
	####################################################

	seqperm_has_fixpts <- function(n) {
	# names in the hat
	names_left <- 1:n
	# Loop over who is drawing (1:(n-1)). The last person does not need to draw, since its fixed who she gets.
	for (i in 1:(n-1)) {
	#print(names_left)
	# # Repeat until a valid name is drawn
	# repeat {
	# id <- sample(menge, size=1, replace=FALSE)
	# if (id != i) break
	# }
	# More efficient version
	valid_candidates <- setdiff(names_left, i)
	if (length(valid_candidates) > 1) {
	# Warning: if x is a vector of length one, then the sampling is interpreted as being from the set 1:x!
	id <- sample(x=valid_candidates, size=1, replace=FALSE)
	} else {
	id <- valid_candidates
	}
	# Remove the name drawn from the hat.
	names_left <- setdiff(names_left, id)
	}
	# fixed point, if the n'th person can only select themself
	#print(names_left)
	#print(names_left == n)
	return(names_left == n)
	}

	set.seed(123)
	replicate(10, seqperm_has_fixpts(n=10))

	# Using the integer sequences
	# Source: https://oeis.org/A136300
	aseq <- readr::read_delim("a136300.txt", col_types = c("ic")) \|>
	#aseq <- readr::read_delim("https://oeis.org/A136300/b136300.txt", col_types = c("ic"), col_names=FALSE) \|>
	setNames(c("n","a(n)")) \|>
	rowwise() \|>
	# Numerator: Number of algorithmic sequences with fixed point
	# Denominator: Total number of algorithmic sequences
	mutate(num = list(as.bigz(`a(n)`)),
	denom = list(as.bigz( factorial(n - 1)^2)))

	# Compute probabilty for failure of algo
	p_oeis <- function(n) {
	stopifnot( all(n>=1) & (n<=30))
	do_one <- function(n) {
	as.numeric(aseq$num[[n]] / aseq$denom[[n]])
	}
	Vectorize(FUN=do_one)(n)
	}

	p_oeis(n=c(1,4,10,30))


	# Recursion taken from
	# https://possiblywrong.wordpress.com/2011/12/05/secret-santa-revisited/
	# and implemented in R.
	#
	# Description:
	# given n total participants, let m be the number
	# still waiting to draw, and let k \leq m be the number waiting to draw whose names have already been drawn.
	# Then the probability that the last person to draw is left with his own name is p(n,0),

	p <- function(m,k) {
	#cat("m=", m, "k=", k, "\n")
	if (k<0) return(0)
	if (m==1) if (k<=1) return(1-k) else return(0)

	res <- k/m * ((k/m) * p(m-1, k-1) + (1-k/m)*p(m-1,k)) +
	(1-k/m)( k/(m-1)p(m-1,k) + (1-k/(m-1))*p(m-1,k+1))
	return(res)
	}

	pFixpoint <- function(n) {
	p(m=n, k=0)
	}

	#################################################
	# Example scenario with n=4 particpants
	#################################################

	n <- 4
	# Simulation (slow)
	mean(replicate(1e5, seqperm_has_fixpts(n)))

	# Simulation (faster: parallel version)
	library(future)
	library(future.apply)
	plan(multisession) # or multicore on Linux/macOS

	res <- future_replicate(
	1e5,
	seqperm_has_fixpts(n),
	future.seed = TRUE
	)
	mean(res)
	plan(sequential)
	# Exact (own derivation by hand)
	1/18 + 1/12
	# From the integer sequences - https://oeis.org/A136300
	p_oeis(n=4)
	# exact computation
	5 / factorial(n-1)^2
	# using the recursive function
	pFixpoint(n=n)


	## Check branch probabilities for Algo 1 for n=4.
	## These are the numbers on p. 14 (Appendix)

	branch_prob4 <- c(# 1 picks 2
	1/31/31*1,
	1/31/31/2*1,
	1/31/31/2*1,
	1/31/31*1,
	# 1 picks 3
	1/31/21/2*1,
	1/31/21/2*1,
	1/31/21/2*1,
	1/31/21/2*1,
	# 1 picks 4
	1/31/21*1,
	1/31/21/2*1,
	1/31/21/2*1
	)
	four_gives <- c(3,4,1,3,4,2,2,1,3,2,1)
	sum(branch_prob4)
	c(sum(branch_prob4[four_gives == 1]), 8/36)
	c(sum(branch_prob4[four_gives == 2]), 9/36)
	c(sum(branch_prob4[four_gives == 3]), 14/36)
	c(sum(branch_prob4[four_gives == 4]), 5/36)
	c(8,9,14) / sum(c(8,9,14))


	#################################################
	# Example scenario with n=10 particpants
	# (own calculation not possibly anymore!)
	#################################################

	n <- 10
	p_oeis(n=n)

	mean(future_replicate(
	1e5,
	seqperm_has_fixpts(n),
	future.seed = TRUE
	))
	pFixpoint(n=n)

	##############################################
	# Graph of P(at least one fixed point) as a
	# function of n
	##############################################

	df <- tibble(n=2:30, p=p_oeis(n=n), p_random=1/n, p_asymp=1/(n+1))
	ggplot(df, aes(x=n, ymin=0, ymax=p)) + geom_linerange() +
	ylab("P(Permutation has fixed point)") +
	geom_line(aes(y=p_random), col="steelblue") +
	geom_line(aes(y=p_asymp), col="magenta") +
	scale_x_continuous(minor_breaks = 1:30)
	ggsave(filename="P_fixedpoint.png", width=8, height=4, unit="in")
	# Are always below the probability of a fail
	# from a random assignment.
	all(df$p < df$p_random)
	df$p / df$p_random

	##################################################
	# algorithm 2: everybody draws a card/name randomly,
	# only when everybody has drawn will it be checked
	# if anybody has drawn their own name.
	# the function is 1 if there are fixpoints and
	# 0 otherwise.
	#
	# Note:
	# It can be shown that
	# \lim_{n\rightarrow\infty} P(Permutation of the n participants has fixpoints) -> 1-1/e \approx 0.63
	# Thus P(Perm has no fixpoints) -> 1/e
	#################################################

	permutation_has_fixpts <- function(n) {
	set <- 1L:n
	perm <- sample(set, size=n, replace=FALSE)
	return(any(set == perm))
	}


	#
	# pacman::p_load("stat.extend")

	#' Logarithm of the subfactorial numbers
	#' Function:
	#' Author: Ben O'Neill
	#'
	#' Fetched from https://github.com/cran/stat.extend/blob/master/R/lsubfactorial.R
	#' \code{lsubfactorial} returns the logarithms of the subfactorial numbers.
	#' See also https://en.wikipedia.org/wiki/Derangement -
	#' The subfactorial numbers count the number of derangements of a set of objects (permutations in which no element
	#' appears in its original position). This function computes the logarithms of the subfactorial numbers for a given
	#' input vector specifying the numbers of interest.
	#'
	#' @param x A vector of non-negative integers
	#' @return If the input is a vector of non-negative integers, the output will be a vector of the logarithms of
	#' the corresponding subfactorial numbers.
	#'
	#' @examples
	#' # In the limit n! / !n goes to e
	#' # so limit of differences of logs is 1
	#' lfactorial(1000) - lsubfactorial(1000)
	#'
	#' See also https://oeis.org/A000166

	lsubfactorial <- function(x) {
	#Compute values of the subfactorials recursively
	MAX <- max(x, na.rm = TRUE)
	LL <- rep(0, max(2,MAX+1))
	LL[2] <- -Inf
	if (MAX > 1) {
	for (k in 2:MAX) {
	LL[k+1] <- log(k-1) + matrixStats::logSumExp(c(LL[k], LL[k-1])) } }

	#Compute output
	OUT <- rep(0, length(x))
	for (i in seq_along(x)) {
	if (x[i] < 0 \|\| is.na(x[i]) ) {
	OUT[i] <- NA }
	else {
	OUT[i] <- LL[x[i]+1]}}

	#Return output
	OUT
	}

	# Scenario with n=4
	n <- 4L
	mean(replicate(1e5, permutation_has_fixpts(n)))

	################################################
	# Check if a permutation x of the elements 1:length(x)
	# has a fixed point.
	#
	# Params:
	# x - the permutation, p(1) = x[1], ..., p(n)=x[n]
	#
	# Returns:
	# Bolean, if \exist i \in {1,...,n}, s.t. p(i)=i.
	################################################

	has_fixed_point <- function(x) {
	any(x == 1:length(x))
	}

	# Generate all possible permutations of 1:n
	# and check for each, if it has at least one fixed point
	permutations <- arrangements::permutations(n=n) \|>
	as_tibble(.name_repair="universal") \|>
	rowwise() \|>
	mutate(HAS_FIXED_POINT = has_fixed_point(c_across(everything())))

	# Check if derangements are s.t. the probability is
	# is equal that i becomes the secret santa of j, j\neq i.
	table(permutations$...1)
	table(permutations$...2)
	table(permutations$...3)
	table(permutations$...4)

	# Make a table
	knitr::kable(t(permutations), row.names = FALSE)
	sum(permutations$HAS_FIXED_POINT)
	length(permutations$HAS_FIXED_POINT)
	mean(permutations$HAS_FIXED_POINT)
	1-1/exp(1)

	# Prob for at least one fixed point in the permuation
	1-exp(lsubfactorial(n) - lfactorial(n))
	1-1/(exp(1))
	exp(lsubfactorial(x=10))
	# For comparison: https://oeis.org/A000166 für n=10: 10 1334961

	# own inefficient implementation of the subfactorial
	subfactorial_bydef <- function(n) {
	k_idx <- 0:n
	factorial(n) * sum((-1)^k_idx / factorial(k_idx))
	}
	1 - subfactorial_bydef(n)/factorial(n)

	# A plot
	df <- tibble(n=2:25L, permutations=factorial(n), dearangements=exp(lsubfactorial(n))) %>%
	pivot_longer(-n, values_to="nConfig")

	ggplot(df, aes(x = n, y = nConfig, color=name)) +
	geom_line() +
	scale_y_log10() +
	theme(legend.position="bottom")

	# Recursive formula for fail probability of Algo 2.
	# Corresponding in style to the function from the Wordpress blog post (but different sampling)
	# q(m,k) = \frac{k}{m} \, q(m-1,k-1) + \Big(1-\frac{k}{m}\Big) q(m-1,k)

	p_algo2 <- function(m, k) {
	# invalid states
	if (k < 0 \|\| k > m) return(0)

	# no one left: no failure possible
	if (m == 0) return(0)

	# one person left
	if (m == 1) return(1 - k)

	# recursion
	return(
	(k / m) * p_algo2(m - 1, k - 1) +
	(1 - k / m) * p_algo2(m - 1, k)
	)
	}
	p_algo2(m=10, k=0)
	p_fail_algo2 <- function(n) {
	p_algo2(m=n,k=0)
	}
	p_fail_algo2(n=10)

	##########################################
	# Number of runs of Algo 2 before a valid
	# configuration is found.
	###########################################

	# PMF for the number of attempts - we know
	# the success probability of Algo 2 is 1/e
	tibble(trials=0:(2*n), pmf=dgeom(trials, prob=1/exp(1))) %>%
	ggplot(aes(x=trials+1, ymin=0, ymax=pmf)) +
	geom_linerange() +
	xlab("Number of runs")
	runs <- rgeom(1e5, prob=1/exp(1)) + 1
	mean(runs)
	# Expected number of runs of the algorithm
	1/ (1/exp(1))

	##########
	### Asymptotics of Algo 1
	##########

	# p_n = \prod_{i=1}^{n-1} \left(1 - \frac{1}{n-i}\right).

	p_fail_algo1 <- function(n) {
	i_set <- seq_len(n-2)
	prod(1 - 1/(n-i_set))*1/2
	}
	p_oeis(n=4)
	p_fail_algo1(n=4)
No results found