Skip to content

Instantly share code, notes, and snippets.

@tjmahr
Created February 12, 2026 20:27
Show Gist options
  • Select an option

  • Save tjmahr/4d7823dc960d0a3a195125b094ec4bf3 to your computer and use it in GitHub Desktop.

Select an option

Save tjmahr/4d7823dc960d0a3a195125b094ec4bf3 to your computer and use it in GitHub Desktop.
Marginal means for `~ (control_file_type | subject_num) + (phase | listener)`
library(tidyverse)
library(brms)
targets::tar_load(model_wtocs_fast_phase_type_interaction_slopes)
model <- model_wtocs_fast_phase_type_interaction_slopes
data_marg <- compute_marginal_means_child_listener_slopes(model)
# Compute marginal means for the model with random effects:
# ~ (control_file_type | subject_num) + (phase | listener)
compute_marginal_means_child_listener_slopes <- function(
model,
newdata = NULL,
n_subject = 200,
seed = NULL
) {
# `model_wtocs_fast_phase_type_interaction_slopes` has the formula
#
# n_correct | trials(n_trials) ~
# 1 + splines::ns(age_48, df = 3) + control_file_type * phase +
# (control_file_type | subject_num) +
# (phase | listener)
#
# where `phase` has two levels (words_1, words_2) and `control_file_type` has
# two levels (Non-parent, Parent). We are going to marginalize over the random
# effect variance in the `| subject_num` level and `| listener` units.
#
# Notes:
# 1) in the experiment was each `subject_num` was heard by a `"Parent"`
# and two `"Non-parent"` listeners. So, 1 simulated `subject_num` need to
# paired off with 3 different simulated `listeners`.
# 2) The value n_subject = 200 approximates the sample size of the original
# dataset so the "estimating marginal means" here is more like a sampling from
# the posterior predictive distribution of conditional sample means.
if (!is.null(seed)) withr::local_seed(seed)
## Get conditional predictions for a typical child/listener
if (is.null(newdata)) {
newdata <- model$data |>
distinct(phase, control_file_type) |>
tidyr::crossing(age_48 = c(-12, 0, 12)) |>
# use "fake-" subject and listener so that the model doesn't use the
# estimates from a real child
mutate(
subject_num = "fake-subject",
listener = "fake-listener",
n_trials = 1
)
}
data_linpreds <- newdata |>
tidybayes::add_linpred_rvars(
model,
# prediction for a "typical" subject/listener
re_formula = NA
)
## Simulate a sample of new subjects/listeners
# Extract covariance matrices as rvars
l <- VarCorr(model, summary = FALSE)
stopifnot(length(l) == 2)
sigma_l <- l$listener$cov |> posterior::rvar()
sigma_s <- l$subject_num$cov |> posterior::rvar()
stopifnot(
ncol(sigma_l) == 2,
ncol(sigma_s) == 2,
colnames(sigma_l) == c("Intercept", "phasewords_2"),
colnames(sigma_s) == c("Intercept", "control_file_typeParent")
)
# Matrix to add 1st column to 2nd column in 2-col matrix (undo dummy coding)
# rows %*% mat_undummy == { rows[, 2] <- rows[, 1] + rows[, 2] }
mat_undummy <- matrix(c(1, 0, 1, 1), nrow = 2)
# Simulate new children
new_children <- posterior::rdo(mvtnorm::rmvnorm(n_subject, c(0, 0), sigma_s))
new_parent_non_non <- (new_children %*% mat_undummy) |>
# each child has 1 parent listener and 2 nonparent listeners
_[, c(2, 1, 1)] |>
# stack the matrix columns
as.vector()
control_file_types <- c(
rep("Parent", n_subject),
rep("Non-parent", 2 * n_subject)
)
# Simulate new listeners
n_l <- length(control_file_types)
new_listeners <- posterior::rdo(mvtnorm::rmvnorm(n_l, c(0, 0), sigma_l))
new_listeners_words1_words_2 <- (new_listeners %*% mat_undummy) |>
as.vector()
new_listener_parent_non_non <- c(new_parent_non_non, new_parent_non_non) +
new_listeners_words1_words_2
conditions <- c(rep("words_1", n_l), rep("words_2", n_l))
data_sim_effects <- tibble(
.adjustment = new_listener_parent_non_non,
control_file_type = c(control_file_types, control_file_types),
phase = conditions
)
stopifnot(nrow(data_sim_effects) == 6 * n_subject)
# Create the new participants by adding the typical participant plus the
# simulated subject_num and listener effects
data_marginal <- data_linpreds |>
left_join(
data_sim_effects,
by = join_by(phase, control_file_type),
relationship = "many-to-many"
) |>
mutate(
.marg_pred = brms::inv_logit_scaled(.linpred + .adjustment),
.cond_pred = brms::inv_logit_scaled(.linpred)
) |>
group_by(phase, control_file_type, age_48) |>
# Marginalize over the simulated subjects
summarise(
.linear = posterior::rvar_mean(.linpred),
.adjustment = posterior::rvar_mean(.adjustment),
.conditional = posterior::rvar_mean(.cond_pred),
.marginal = posterior::rvar_mean(.marg_pred),
.groups = "drop"
)
# (unnest rvars if saving to tabular format)
data_marginal
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment