Last active
February 3, 2026 16:03
-
-
Save jemus42/af89cf5b07a267dcd5b70951185aa2ea to your computer and use it in GitHub Desktop.
CPI with xplainfi with groups
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(xplainfi) | |
| library(mlr3learners) | |
| #> Loading required package: mlr3 | |
| # Make sure ranger is installed | |
| # Some data with correlated features | |
| task = sim_dgp_correlated(n = 1000, r = .7) | |
| learner = lrn("regr.ranger", num.trees = 100) | |
| measure = msr("regr.mse") | |
| resampling = rsmp("holdout") | |
| # Create knockoff matrix (Gaussian) from task (uses all data) | |
| knockoff_sampler = KnockoffGaussianSampler$new(task = task) | |
| cfi = CFI$new( | |
| learner = learner, | |
| task = task, | |
| measure = measure, | |
| resampling = resampling, | |
| sampler = knockoff_sampler, | |
| # Could use more repeats for stability but would need to creat emultiple knockoff matrices in sampler | |
| n_repeats = 1 | |
| ) | |
| cfi$compute() | |
| # Same as CPI under the hood: | |
| cfi$importance(ci_method = "cpi") | |
| #> Key: <feature> | |
| #> feature importance se statistic p.value conf_lower | |
| #> <char> <num> <num> <num> <num> <num> | |
| #> 1: x1 4.3122236075 0.369125526 11.68226879 6.020552e-27 3.703367219 | |
| #> 2: x2 -0.0007774337 0.016377543 -0.04746949 5.189162e-01 -0.027791474 | |
| #> 3: x3 1.7174102789 0.117239743 14.64870386 3.970628e-38 1.524028457 | |
| #> 4: x4 0.0027353962 0.002649429 1.03244751 1.513070e-01 -0.001634721 | |
| #> conf_upper | |
| #> <num> | |
| #> 1: Inf | |
| #> 2: Inf | |
| #> 3: Inf | |
| #> 4: Inf | |
| # Observation-wise loss differences | |
| cfi$obs_loss() | |
| #> feature iter_rsmp iter_repeat row_ids loss_baseline loss_post | |
| #> <char> <int> <int> <int> <num> <num> | |
| #> 1: x1 1 1 2 1.234542e-06 2.636605010 | |
| #> 2: x1 1 1 4 2.673268e-03 19.545713486 | |
| #> 3: x1 1 1 11 4.150419e-02 0.008964166 | |
| #> 4: x1 1 1 16 7.323025e-04 0.826810619 | |
| #> 5: x1 1 1 29 1.553946e-02 2.216122152 | |
| #> --- | |
| #> 1328: x4 1 1 995 6.389680e-02 0.028051615 | |
| #> 1329: x4 1 1 996 1.995682e-02 0.032137676 | |
| #> 1330: x4 1 1 998 1.184186e+00 1.371750862 | |
| #> 1331: x4 1 1 999 4.278355e-01 0.569745025 | |
| #> 1332: x4 1 1 1000 2.227433e-02 0.010902248 | |
| #> obs_importance | |
| #> <num> | |
| #> 1: 2.63660377 | |
| #> 2: 19.54304022 | |
| #> 3: -0.03254002 | |
| #> 4: 0.82607832 | |
| #> 5: 2.20058269 | |
| #> --- | |
| #> 1328: -0.03584518 | |
| #> 1329: 0.01218085 | |
| #> 1330: 0.18756487 | |
| #> 1331: 0.14190949 | |
| #> 1332: -0.01137208 | |
| # Same but with groups: | |
| task$feature_names | |
| #> [1] "x1" "x2" "x3" "x4" | |
| groups = list( | |
| correlated = c("x1", "x2"), | |
| noise = c("x3", "x4") | |
| ) | |
| cfi = CFI$new( | |
| learner = learner, | |
| task = task, | |
| # Just add the group definition, rest is identical | |
| groups = groups, | |
| measure = measure, | |
| resampling = resampling, | |
| sampler = knockoff_sampler, | |
| n_repeats = 1 | |
| ) | |
| cfi$compute() | |
| cfi$importance(ci_method = "cpi") | |
| #> Key: <feature> | |
| #> feature importance se statistic p.value conf_lower conf_upper | |
| #> <char> <num> <num> <num> <num> <num> <num> | |
| #> 1: correlated 4.607162 0.3392991 13.57847 5.224602e-34 4.047504 Inf | |
| #> 2: noise 1.709909 0.1280463 13.35383 3.735263e-33 1.498702 Inf |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment