tjmahr · February 12, 2026 20:27
diff --git a/marginalize_2_random_effects.R b/marginalize_2_random_effects.R
 library(tidyverse)
 library(brms)
 targets::tar_load(model_wtocs_fast_phase_type_interaction_slopes)
 model <- model_wtocs_fast_phase_type_interaction_slopes
 data_marg <- compute_marginal_means_child_listener_slopes(model)


 # Compute marginal means for the model with random effects:
 #   ~ (control_file_type | subject_num) + (phase | listener)
 compute_marginal_means_child_listener_slopes <- function(
    model,
    newdata = NULL,
    n_subject = 200,
    seed = NULL
 ) {
  # `model_wtocs_fast_phase_type_interaction_slopes` has the formula
  #
  #   n_correct | trials(n_trials) ~
  #     1 + splines::ns(age_48, df = 3) + control_file_type * phase +
  #     (control_file_type | subject_num) +
  #     (phase | listener)
  #
  # where `phase` has two levels (words_1, words_2) and `control_file_type` has
  # two levels (Non-parent, Parent). We are going to marginalize over the random
  # effect variance in the `| subject_num` level and `| listener` units.
  #
  # Notes:
  # 1) in the experiment was each `subject_num` was heard by a `"Parent"`
  # and two `"Non-parent"` listeners. So, 1 simulated `subject_num` need to
  # paired off with 3 different simulated `listeners`.
  # 2) The value n_subject = 200 approximates the sample size of the original
  # dataset so the "estimating marginal means" here is more like a sampling from
  # the posterior predictive distribution of conditional sample means.

  if (!is.null(seed)) withr::local_seed(seed)

  ## Get conditional predictions for a typical child/listener

  if (is.null(newdata)) {
    newdata <- model$data |>
      distinct(phase, control_file_type) |>
      tidyr::crossing(age_48 = c(-12, 0, 12)) |>
      # use "fake-" subject and listener so that the model doesn't use the
      # estimates from a real child
      mutate(
        subject_num = "fake-subject",
        listener = "fake-listener",
        n_trials = 1
      )
  }

  data_linpreds <- newdata |>
    tidybayes::add_linpred_rvars(
      model,
      # prediction for a "typical" subject/listener
      re_formula = NA
    )

  ## Simulate a sample of new subjects/listeners

  # Extract covariance matrices as rvars
  l <- VarCorr(model, summary = FALSE)
  stopifnot(length(l) == 2)

  sigma_l <- l$listener$cov |> posterior::rvar()
  sigma_s <- l$subject_num$cov |> posterior::rvar()
  stopifnot(
    ncol(sigma_l) == 2,
    ncol(sigma_s) == 2,
    colnames(sigma_l) == c("Intercept", "phasewords_2"),
    colnames(sigma_s) == c("Intercept", "control_file_typeParent")
  )

  # Matrix to add 1st column to 2nd column in 2-col matrix (undo dummy coding)
  #   rows %*% mat_undummy == { rows[, 2] <- rows[, 1] + rows[, 2] }
  mat_undummy <- matrix(c(1, 0, 1, 1), nrow = 2)

  # Simulate new children
  new_children <- posterior::rdo(mvtnorm::rmvnorm(n_subject, c(0, 0), sigma_s))

  new_parent_non_non <- (new_children %*% mat_undummy) |>
    # each child has 1 parent listener and 2 nonparent listeners
    _[, c(2, 1, 1)] |>
    # stack the matrix columns
    as.vector()

  control_file_types <- c(
    rep("Parent", n_subject),
    rep("Non-parent", 2 * n_subject)
  )

  # Simulate new listeners
  n_l <- length(control_file_types)
  new_listeners <- posterior::rdo(mvtnorm::rmvnorm(n_l, c(0, 0), sigma_l))
  new_listeners_words1_words_2 <- (new_listeners %*% mat_undummy) |>
    as.vector()

  new_listener_parent_non_non <- c(new_parent_non_non, new_parent_non_non) +
    new_listeners_words1_words_2
  conditions <- c(rep("words_1", n_l), rep("words_2", n_l))

  data_sim_effects <- tibble(
    .adjustment = new_listener_parent_non_non,
    control_file_type = c(control_file_types, control_file_types),
    phase = conditions
  )
  stopifnot(nrow(data_sim_effects) == 6 * n_subject)

  # Create the new participants by adding the typical participant plus the
  # simulated subject_num and listener effects

  data_marginal <- data_linpreds |>
    left_join(
      data_sim_effects,
      by = join_by(phase, control_file_type),
      relationship = "many-to-many"
    ) |>
    mutate(
      .marg_pred = brms::inv_logit_scaled(.linpred + .adjustment),
      .cond_pred = brms::inv_logit_scaled(.linpred)
    ) |>
    group_by(phase, control_file_type, age_48) |>
    # Marginalize over the simulated subjects
    summarise(
      .linear = posterior::rvar_mean(.linpred),
      .adjustment = posterior::rvar_mean(.adjustment),
      .conditional = posterior::rvar_mean(.cond_pred),
      .marginal = posterior::rvar_mean(.marg_pred),
      .groups = "drop"
    )

  # (unnest rvars if saving to tabular format)

  data_marginal
 }
	library(tidyverse)
	library(brms)
	targets::tar_load(model_wtocs_fast_phase_type_interaction_slopes)
	model <- model_wtocs_fast_phase_type_interaction_slopes
	data_marg <- compute_marginal_means_child_listener_slopes(model)


	# Compute marginal means for the model with random effects:
	# ~ (control_file_type \| subject_num) + (phase \| listener)
	compute_marginal_means_child_listener_slopes <- function(
	model,
	newdata = NULL,
	n_subject = 200,
	seed = NULL
	) {
	# `model_wtocs_fast_phase_type_interaction_slopes` has the formula
	#
	# n_correct \| trials(n_trials) ~
	# 1 + splines::ns(age_48, df = 3) + control_file_type * phase +
	# (control_file_type \| subject_num) +
	# (phase \| listener)
	#
	# where `phase` has two levels (words_1, words_2) and `control_file_type` has
	# two levels (Non-parent, Parent). We are going to marginalize over the random
	# effect variance in the `\| subject_num` level and `\| listener` units.
	#
	# Notes:
	# 1) in the experiment was each `subject_num` was heard by a `"Parent"`
	# and two `"Non-parent"` listeners. So, 1 simulated `subject_num` need to
	# paired off with 3 different simulated `listeners`.
	# 2) The value n_subject = 200 approximates the sample size of the original
	# dataset so the "estimating marginal means" here is more like a sampling from
	# the posterior predictive distribution of conditional sample means.

	if (!is.null(seed)) withr::local_seed(seed)

	## Get conditional predictions for a typical child/listener

	if (is.null(newdata)) {
	newdata <- model$data \|>
	distinct(phase, control_file_type) \|>
	tidyr::crossing(age_48 = c(-12, 0, 12)) \|>
	# use "fake-" subject and listener so that the model doesn't use the
	# estimates from a real child
	mutate(
	subject_num = "fake-subject",
	listener = "fake-listener",
	n_trials = 1
	)
	}

	data_linpreds <- newdata \|>
	tidybayes::add_linpred_rvars(
	model,
	# prediction for a "typical" subject/listener
	re_formula = NA
	)

	## Simulate a sample of new subjects/listeners

	# Extract covariance matrices as rvars
	l <- VarCorr(model, summary = FALSE)
	stopifnot(length(l) == 2)

	sigma_l <- l$listener$cov \|> posterior::rvar()
	sigma_s <- l$subject_num$cov \|> posterior::rvar()
	stopifnot(
	ncol(sigma_l) == 2,
	ncol(sigma_s) == 2,
	colnames(sigma_l) == c("Intercept", "phasewords_2"),
	colnames(sigma_s) == c("Intercept", "control_file_typeParent")
	)

	# Matrix to add 1st column to 2nd column in 2-col matrix (undo dummy coding)
	# rows %*% mat_undummy == { rows[, 2] <- rows[, 1] + rows[, 2] }
	mat_undummy <- matrix(c(1, 0, 1, 1), nrow = 2)

	# Simulate new children
	new_children <- posterior::rdo(mvtnorm::rmvnorm(n_subject, c(0, 0), sigma_s))

	new_parent_non_non <- (new_children %*% mat_undummy) \|>
	# each child has 1 parent listener and 2 nonparent listeners
	_[, c(2, 1, 1)] \|>
	# stack the matrix columns
	as.vector()

	control_file_types <- c(
	rep("Parent", n_subject),
	rep("Non-parent", 2 * n_subject)
	)

	# Simulate new listeners
	n_l <- length(control_file_types)
	new_listeners <- posterior::rdo(mvtnorm::rmvnorm(n_l, c(0, 0), sigma_l))
	new_listeners_words1_words_2 <- (new_listeners %*% mat_undummy) \|>
	as.vector()

	new_listener_parent_non_non <- c(new_parent_non_non, new_parent_non_non) +
	new_listeners_words1_words_2
	conditions <- c(rep("words_1", n_l), rep("words_2", n_l))

	data_sim_effects <- tibble(
	.adjustment = new_listener_parent_non_non,
	control_file_type = c(control_file_types, control_file_types),
	phase = conditions
	)
	stopifnot(nrow(data_sim_effects) == 6 * n_subject)

	# Create the new participants by adding the typical participant plus the
	# simulated subject_num and listener effects

	data_marginal <- data_linpreds \|>
	left_join(
	data_sim_effects,
	by = join_by(phase, control_file_type),
	relationship = "many-to-many"
	) \|>
	mutate(
	.marg_pred = brms::inv_logit_scaled(.linpred + .adjustment),
	.cond_pred = brms::inv_logit_scaled(.linpred)
	) \|>
	group_by(phase, control_file_type, age_48) \|>
	# Marginalize over the simulated subjects
	summarise(
	.linear = posterior::rvar_mean(.linpred),
	.adjustment = posterior::rvar_mean(.adjustment),
	.conditional = posterior::rvar_mean(.cond_pred),
	.marginal = posterior::rvar_mean(.marg_pred),
	.groups = "drop"
	)

	# (unnest rvars if saving to tabular format)

	data_marginal
	}
No results found