Skip to content

Instantly share code, notes, and snippets.

@jemus42
Last active February 3, 2026 16:03
Show Gist options
  • Select an option

  • Save jemus42/af89cf5b07a267dcd5b70951185aa2ea to your computer and use it in GitHub Desktop.

Select an option

Save jemus42/af89cf5b07a267dcd5b70951185aa2ea to your computer and use it in GitHub Desktop.
CPI with xplainfi with groups
library(xplainfi)
library(mlr3learners)
#> Loading required package: mlr3
# Make sure ranger is installed
# Some data with correlated features
task = sim_dgp_correlated(n = 1000, r = .7)
learner = lrn("regr.ranger", num.trees = 100)
measure = msr("regr.mse")
resampling = rsmp("holdout")
# Create knockoff matrix (Gaussian) from task (uses all data)
knockoff_sampler = KnockoffGaussianSampler$new(task = task)
cfi = CFI$new(
learner = learner,
task = task,
measure = measure,
resampling = resampling,
sampler = knockoff_sampler,
# Could use more repeats for stability but would need to creat emultiple knockoff matrices in sampler
n_repeats = 1
)
cfi$compute()
# Same as CPI under the hood:
cfi$importance(ci_method = "cpi")
#> Key: <feature>
#> feature importance se statistic p.value conf_lower
#> <char> <num> <num> <num> <num> <num>
#> 1: x1 4.3122236075 0.369125526 11.68226879 6.020552e-27 3.703367219
#> 2: x2 -0.0007774337 0.016377543 -0.04746949 5.189162e-01 -0.027791474
#> 3: x3 1.7174102789 0.117239743 14.64870386 3.970628e-38 1.524028457
#> 4: x4 0.0027353962 0.002649429 1.03244751 1.513070e-01 -0.001634721
#> conf_upper
#> <num>
#> 1: Inf
#> 2: Inf
#> 3: Inf
#> 4: Inf
# Observation-wise loss differences
cfi$obs_loss()
#> feature iter_rsmp iter_repeat row_ids loss_baseline loss_post
#> <char> <int> <int> <int> <num> <num>
#> 1: x1 1 1 2 1.234542e-06 2.636605010
#> 2: x1 1 1 4 2.673268e-03 19.545713486
#> 3: x1 1 1 11 4.150419e-02 0.008964166
#> 4: x1 1 1 16 7.323025e-04 0.826810619
#> 5: x1 1 1 29 1.553946e-02 2.216122152
#> ---
#> 1328: x4 1 1 995 6.389680e-02 0.028051615
#> 1329: x4 1 1 996 1.995682e-02 0.032137676
#> 1330: x4 1 1 998 1.184186e+00 1.371750862
#> 1331: x4 1 1 999 4.278355e-01 0.569745025
#> 1332: x4 1 1 1000 2.227433e-02 0.010902248
#> obs_importance
#> <num>
#> 1: 2.63660377
#> 2: 19.54304022
#> 3: -0.03254002
#> 4: 0.82607832
#> 5: 2.20058269
#> ---
#> 1328: -0.03584518
#> 1329: 0.01218085
#> 1330: 0.18756487
#> 1331: 0.14190949
#> 1332: -0.01137208
# Same but with groups:
task$feature_names
#> [1] "x1" "x2" "x3" "x4"
groups = list(
correlated = c("x1", "x2"),
noise = c("x3", "x4")
)
cfi = CFI$new(
learner = learner,
task = task,
# Just add the group definition, rest is identical
groups = groups,
measure = measure,
resampling = resampling,
sampler = knockoff_sampler,
n_repeats = 1
)
cfi$compute()
cfi$importance(ci_method = "cpi")
#> Key: <feature>
#> feature importance se statistic p.value conf_lower conf_upper
#> <char> <num> <num> <num> <num> <num> <num>
#> 1: correlated 4.607162 0.3392991 13.57847 5.224602e-34 4.047504 Inf
#> 2: noise 1.709909 0.1280463 13.35383 3.735263e-33 1.498702 Inf
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment