Last active
December 17, 2025 13:23
-
-
Save mhoehle/ecf7774e492d2a4d8699948cfb126746 to your computer and use it in GitHub Desktop.
R code to experiment with the combinatorial calculation of secret santa
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| ######################################## | |
| # Functions for performing combinatorial | |
| # computations for secret santa permutations | |
| # | |
| # Author: Michael Höhle | |
| # Date: 2025-12-15 | |
| # | |
| # (based on Stoch 24/25 code developed 2024-12-20) | |
| ######################################## | |
| pacman::p_load(tidyverse) | |
| # Work with large integers | |
| pacman::p_load(gmp) | |
| #################################################### | |
| # Algorithm 1: sequential drawing with redraws | |
| # if the drawer id draws their own id | |
| # | |
| # Params: | |
| # n - number of participants | |
| #################################################### | |
| seqperm_has_fixpts <- function(n) { | |
| # names in the hat | |
| names_left <- 1:n | |
| # Loop over who is drawing (1:(n-1)). The last person does not need to draw, since its fixed who she gets. | |
| for (i in 1:(n-1)) { | |
| #print(names_left) | |
| # # Repeat until a valid name is drawn | |
| # repeat { | |
| # id <- sample(menge, size=1, replace=FALSE) | |
| # if (id != i) break | |
| # } | |
| # More efficient version | |
| valid_candidates <- setdiff(names_left, i) | |
| if (length(valid_candidates) > 1) { | |
| # Warning: if x is a vector of length one, then the sampling is interpreted as being from the set 1:x! | |
| id <- sample(x=valid_candidates, size=1, replace=FALSE) | |
| } else { | |
| id <- valid_candidates | |
| } | |
| # Remove the name drawn from the hat. | |
| names_left <- setdiff(names_left, id) | |
| } | |
| # fixed point, if the n'th person can only select themself | |
| #print(names_left) | |
| #print(names_left == n) | |
| return(names_left == n) | |
| } | |
| set.seed(123) | |
| replicate(10, seqperm_has_fixpts(n=10)) | |
| # Using the integer sequences | |
| # Source: https://oeis.org/A136300 | |
| aseq <- readr::read_delim("a136300.txt", col_types = c("ic")) |> | |
| #aseq <- readr::read_delim("https://oeis.org/A136300/b136300.txt", col_types = c("ic"), col_names=FALSE) |> | |
| setNames(c("n","a(n)")) |> | |
| rowwise() |> | |
| # Numerator: Number of algorithmic sequences with fixed point | |
| # Denominator: Total number of algorithmic sequences | |
| mutate(num = list(as.bigz(`a(n)`)), | |
| denom = list(as.bigz( factorial(n - 1)^2))) | |
| # Compute probabilty for failure of algo | |
| p_oeis <- function(n) { | |
| stopifnot( all(n>=1) & (n<=30)) | |
| do_one <- function(n) { | |
| as.numeric(aseq$num[[n]] / aseq$denom[[n]]) | |
| } | |
| Vectorize(FUN=do_one)(n) | |
| } | |
| p_oeis(n=c(1,4,10,30)) | |
| # Recursion taken from | |
| # https://possiblywrong.wordpress.com/2011/12/05/secret-santa-revisited/ | |
| # and implemented in R. | |
| # | |
| # Description: | |
| # given n total participants, let m be the number | |
| # still waiting to draw, and let k \leq m be the number waiting to draw whose names have already been drawn. | |
| # Then the probability that the last person to draw is left with his own name is p(n,0), | |
| p <- function(m,k) { | |
| #cat("m=", m, "k=", k, "\n") | |
| if (k<0) return(0) | |
| if (m==1) if (k<=1) return(1-k) else return(0) | |
| res <- k/m * ((k/m) * p(m-1, k-1) + (1-k/m)*p(m-1,k)) + | |
| (1-k/m)*( k/(m-1)*p(m-1,k) + (1-k/(m-1))*p(m-1,k+1)) | |
| return(res) | |
| } | |
| pFixpoint <- function(n) { | |
| p(m=n, k=0) | |
| } | |
| ################################################# | |
| # Example scenario with n=4 particpants | |
| ################################################# | |
| n <- 4 | |
| # Simulation (slow) | |
| mean(replicate(1e5, seqperm_has_fixpts(n))) | |
| # Simulation (faster: parallel version) | |
| library(future) | |
| library(future.apply) | |
| plan(multisession) # or multicore on Linux/macOS | |
| res <- future_replicate( | |
| 1e5, | |
| seqperm_has_fixpts(n), | |
| future.seed = TRUE | |
| ) | |
| mean(res) | |
| plan(sequential) | |
| # Exact (own derivation by hand) | |
| 1/18 + 1/12 | |
| # From the integer sequences - https://oeis.org/A136300 | |
| p_oeis(n=4) | |
| # exact computation | |
| 5 / factorial(n-1)^2 | |
| # using the recursive function | |
| pFixpoint(n=n) | |
| ## Check branch probabilities for Algo 1 for n=4. | |
| ## These are the numbers on p. 14 (Appendix) | |
| branch_prob4 <- c(# 1 picks 2 | |
| 1/3*1/3*1*1, | |
| 1/3*1/3*1/2*1, | |
| 1/3*1/3*1/2*1, | |
| 1/3*1/3*1*1, | |
| # 1 picks 3 | |
| 1/3*1/2*1/2*1, | |
| 1/3*1/2*1/2*1, | |
| 1/3*1/2*1/2*1, | |
| 1/3*1/2*1/2*1, | |
| # 1 picks 4 | |
| 1/3*1/2*1*1, | |
| 1/3*1/2*1/2*1, | |
| 1/3*1/2*1/2*1 | |
| ) | |
| four_gives <- c(3,4,1,3,4,2,2,1,3,2,1) | |
| sum(branch_prob4) | |
| c(sum(branch_prob4[four_gives == 1]), 8/36) | |
| c(sum(branch_prob4[four_gives == 2]), 9/36) | |
| c(sum(branch_prob4[four_gives == 3]), 14/36) | |
| c(sum(branch_prob4[four_gives == 4]), 5/36) | |
| c(8,9,14) / sum(c(8,9,14)) | |
| ################################################# | |
| # Example scenario with n=10 particpants | |
| # (own calculation not possibly anymore!) | |
| ################################################# | |
| n <- 10 | |
| p_oeis(n=n) | |
| mean(future_replicate( | |
| 1e5, | |
| seqperm_has_fixpts(n), | |
| future.seed = TRUE | |
| )) | |
| pFixpoint(n=n) | |
| ############################################## | |
| # Graph of P(at least one fixed point) as a | |
| # function of n | |
| ############################################## | |
| df <- tibble(n=2:30, p=p_oeis(n=n), p_random=1/n, p_asymp=1/(n+1)) | |
| ggplot(df, aes(x=n, ymin=0, ymax=p)) + geom_linerange() + | |
| ylab("P(Permutation has fixed point)") + | |
| geom_line(aes(y=p_random), col="steelblue") + | |
| geom_line(aes(y=p_asymp), col="magenta") + | |
| scale_x_continuous(minor_breaks = 1:30) | |
| ggsave(filename="P_fixedpoint.png", width=8, height=4, unit="in") | |
| # Are always below the probability of a fail | |
| # from a random assignment. | |
| all(df$p < df$p_random) | |
| df$p / df$p_random | |
| ################################################## | |
| # algorithm 2: everybody draws a card/name randomly, | |
| # only when everybody has drawn will it be checked | |
| # if anybody has drawn their own name. | |
| # the function is 1 if there are fixpoints and | |
| # 0 otherwise. | |
| # | |
| # Note: | |
| # It can be shown that | |
| # \lim_{n\rightarrow\infty} P(Permutation of the n participants has fixpoints) -> 1-1/e \approx 0.63 | |
| # Thus P(Perm has no fixpoints) -> 1/e | |
| ################################################# | |
| permutation_has_fixpts <- function(n) { | |
| set <- 1L:n | |
| perm <- sample(set, size=n, replace=FALSE) | |
| return(any(set == perm)) | |
| } | |
| # | |
| # pacman::p_load("stat.extend") | |
| #' Logarithm of the subfactorial numbers | |
| #' Function: | |
| #' Author: Ben O'Neill | |
| #' | |
| #' Fetched from https://github.com/cran/stat.extend/blob/master/R/lsubfactorial.R | |
| #' \code{lsubfactorial} returns the logarithms of the subfactorial numbers. | |
| #' See also https://en.wikipedia.org/wiki/Derangement - | |
| #' The subfactorial numbers count the number of derangements of a set of objects (permutations in which no element | |
| #' appears in its original position). This function computes the logarithms of the subfactorial numbers for a given | |
| #' input vector specifying the numbers of interest. | |
| #' | |
| #' @param x A vector of non-negative integers | |
| #' @return If the input is a vector of non-negative integers, the output will be a vector of the logarithms of | |
| #' the corresponding subfactorial numbers. | |
| #' | |
| #' @examples | |
| #' # In the limit n! / !n goes to e | |
| #' # so limit of differences of logs is 1 | |
| #' lfactorial(1000) - lsubfactorial(1000) | |
| #' | |
| #' See also https://oeis.org/A000166 | |
| lsubfactorial <- function(x) { | |
| #Compute values of the subfactorials recursively | |
| MAX <- max(x, na.rm = TRUE) | |
| LL <- rep(0, max(2,MAX+1)) | |
| LL[2] <- -Inf | |
| if (MAX > 1) { | |
| for (k in 2:MAX) { | |
| LL[k+1] <- log(k-1) + matrixStats::logSumExp(c(LL[k], LL[k-1])) } } | |
| #Compute output | |
| OUT <- rep(0, length(x)) | |
| for (i in seq_along(x)) { | |
| if (x[i] < 0 || is.na(x[i]) ) { | |
| OUT[i] <- NA } | |
| else { | |
| OUT[i] <- LL[x[i]+1]}} | |
| #Return output | |
| OUT | |
| } | |
| # Scenario with n=4 | |
| n <- 4L | |
| mean(replicate(1e5, permutation_has_fixpts(n))) | |
| ################################################ | |
| # Check if a permutation x of the elements 1:length(x) | |
| # has a fixed point. | |
| # | |
| # Params: | |
| # x - the permutation, p(1) = x[1], ..., p(n)=x[n] | |
| # | |
| # Returns: | |
| # Bolean, if \exist i \in {1,...,n}, s.t. p(i)=i. | |
| ################################################ | |
| has_fixed_point <- function(x) { | |
| any(x == 1:length(x)) | |
| } | |
| # Generate all possible permutations of 1:n | |
| # and check for each, if it has at least one fixed point | |
| permutations <- arrangements::permutations(n=n) |> | |
| as_tibble(.name_repair="universal") |> | |
| rowwise() |> | |
| mutate(HAS_FIXED_POINT = has_fixed_point(c_across(everything()))) | |
| # Check if derangements are s.t. the probability is | |
| # is equal that i becomes the secret santa of j, j\neq i. | |
| table(permutations$...1) | |
| table(permutations$...2) | |
| table(permutations$...3) | |
| table(permutations$...4) | |
| # Make a table | |
| knitr::kable(t(permutations), row.names = FALSE) | |
| sum(permutations$HAS_FIXED_POINT) | |
| length(permutations$HAS_FIXED_POINT) | |
| mean(permutations$HAS_FIXED_POINT) | |
| 1-1/exp(1) | |
| # Prob for at least one fixed point in the permuation | |
| 1-exp(lsubfactorial(n) - lfactorial(n)) | |
| 1-1/(exp(1)) | |
| exp(lsubfactorial(x=10)) | |
| # For comparison: https://oeis.org/A000166 für n=10: 10 1334961 | |
| # own inefficient implementation of the subfactorial | |
| subfactorial_bydef <- function(n) { | |
| k_idx <- 0:n | |
| factorial(n) * sum((-1)^k_idx / factorial(k_idx)) | |
| } | |
| 1 - subfactorial_bydef(n)/factorial(n) | |
| # A plot | |
| df <- tibble(n=2:25L, permutations=factorial(n), dearangements=exp(lsubfactorial(n))) %>% | |
| pivot_longer(-n, values_to="nConfig") | |
| ggplot(df, aes(x = n, y = nConfig, color=name)) + | |
| geom_line() + | |
| scale_y_log10() + | |
| theme(legend.position="bottom") | |
| # Recursive formula for fail probability of Algo 2. | |
| # Corresponding in style to the function from the Wordpress blog post (but different sampling) | |
| # q(m,k) = \frac{k}{m} \, q(m-1,k-1) + \Big(1-\frac{k}{m}\Big) q(m-1,k) | |
| p_algo2 <- function(m, k) { | |
| # invalid states | |
| if (k < 0 || k > m) return(0) | |
| # no one left: no failure possible | |
| if (m == 0) return(0) | |
| # one person left | |
| if (m == 1) return(1 - k) | |
| # recursion | |
| return( | |
| (k / m) * p_algo2(m - 1, k - 1) + | |
| (1 - k / m) * p_algo2(m - 1, k) | |
| ) | |
| } | |
| p_algo2(m=10, k=0) | |
| p_fail_algo2 <- function(n) { | |
| p_algo2(m=n,k=0) | |
| } | |
| p_fail_algo2(n=10) | |
| ########################################## | |
| # Number of runs of Algo 2 before a valid | |
| # configuration is found. | |
| ########################################### | |
| # PMF for the number of attempts - we know | |
| # the success probability of Algo 2 is 1/e | |
| tibble(trials=0:(2*n), pmf=dgeom(trials, prob=1/exp(1))) %>% | |
| ggplot(aes(x=trials+1, ymin=0, ymax=pmf)) + | |
| geom_linerange() + | |
| xlab("Number of runs") | |
| runs <- rgeom(1e5, prob=1/exp(1)) + 1 | |
| mean(runs) | |
| # Expected number of runs of the algorithm | |
| 1/ (1/exp(1)) | |
| ########## | |
| ### Asymptotics of Algo 1 | |
| ########## | |
| # p_n = \prod_{i=1}^{n-1} \left(1 - \frac{1}{n-i}\right). | |
| p_fail_algo1 <- function(n) { | |
| i_set <- seq_len(n-2) | |
| prod(1 - 1/(n-i_set))*1/2 | |
| } | |
| p_oeis(n=4) | |
| p_fail_algo1(n=4) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment