jemus42 · February 3, 2026 16:03
diff --git a/cpi-xplainfi.R b/cpi-xplainfi.R
 library(xplainfi)
 library(mlr3learners)
 #> Loading required package: mlr3
 # Make sure ranger is installed

 # Some data with correlated features
 task = sim_dgp_correlated(n = 1000, r = .7)
 learner = lrn("regr.ranger", num.trees = 100)
 measure = msr("regr.mse")
 resampling = rsmp("holdout")

 # Create knockoff matrix (Gaussian) from task (uses all data)
 knockoff_sampler = KnockoffGaussianSampler$new(task = task)

 cfi = CFI$new(
    learner = learner,
    task = task,
    measure = measure,
    resampling = resampling,
    sampler = knockoff_sampler,
    # Could use more repeats for stability but would need to creat emultiple knockoff matrices in sampler
    n_repeats = 1
 )

 cfi$compute()

 # Same as CPI under the hood:
 cfi$importance(ci_method = "cpi")
 #> Key: <feature>
 #>    feature    importance          se   statistic      p.value   conf_lower
 #>     <char>         <num>       <num>       <num>        <num>        <num>
 #> 1:      x1  4.3122236075 0.369125526 11.68226879 6.020552e-27  3.703367219
 #> 2:      x2 -0.0007774337 0.016377543 -0.04746949 5.189162e-01 -0.027791474
 #> 3:      x3  1.7174102789 0.117239743 14.64870386 3.970628e-38  1.524028457
 #> 4:      x4  0.0027353962 0.002649429  1.03244751 1.513070e-01 -0.001634721
 #>    conf_upper
 #>         <num>
 #> 1:        Inf
 #> 2:        Inf
 #> 3:        Inf
 #> 4:        Inf

 # Observation-wise loss differences
 cfi$obs_loss()
 #>       feature iter_rsmp iter_repeat row_ids loss_baseline    loss_post
 #>        <char>     <int>       <int>   <int>         <num>        <num>
 #>    1:      x1         1           1       2  1.234542e-06  2.636605010
 #>    2:      x1         1           1       4  2.673268e-03 19.545713486
 #>    3:      x1         1           1      11  4.150419e-02  0.008964166
 #>    4:      x1         1           1      16  7.323025e-04  0.826810619
 #>    5:      x1         1           1      29  1.553946e-02  2.216122152
 #>   ---                                                                 
 #> 1328:      x4         1           1     995  6.389680e-02  0.028051615
 #> 1329:      x4         1           1     996  1.995682e-02  0.032137676
 #> 1330:      x4         1           1     998  1.184186e+00  1.371750862
 #> 1331:      x4         1           1     999  4.278355e-01  0.569745025
 #> 1332:      x4         1           1    1000  2.227433e-02  0.010902248
 #>       obs_importance
 #>                <num>
 #>    1:     2.63660377
 #>    2:    19.54304022
 #>    3:    -0.03254002
 #>    4:     0.82607832
 #>    5:     2.20058269
 #>   ---               
 #> 1328:    -0.03584518
 #> 1329:     0.01218085
 #> 1330:     0.18756487
 #> 1331:     0.14190949
 #> 1332:    -0.01137208


 # Same but with groups:

 task$feature_names
 #> [1] "x1" "x2" "x3" "x4"

 groups = list(
    correlated = c("x1", "x2"),
    noise = c("x3", "x4")
 )

 cfi = CFI$new(
    learner = learner,
    task = task,
    # Just add the group definition, rest is identical
    groups = groups,
    measure = measure,
    resampling = resampling,
    sampler = knockoff_sampler,
    n_repeats = 1
 )

 cfi$compute()
 cfi$importance(ci_method = "cpi")
 #> Key: <feature>
 #>       feature importance        se statistic      p.value conf_lower conf_upper
 #>        <char>      <num>     <num>     <num>        <num>      <num>      <num>
 #> 1: correlated   4.607162 0.3392991  13.57847 5.224602e-34   4.047504        Inf
 #> 2:      noise   1.709909 0.1280463  13.35383 3.735263e-33   1.498702        Inf
	library(xplainfi)
	library(mlr3learners)
	#> Loading required package: mlr3
	# Make sure ranger is installed

	# Some data with correlated features
	task = sim_dgp_correlated(n = 1000, r = .7)
	learner = lrn("regr.ranger", num.trees = 100)
	measure = msr("regr.mse")
	resampling = rsmp("holdout")

	# Create knockoff matrix (Gaussian) from task (uses all data)
	knockoff_sampler = KnockoffGaussianSampler$new(task = task)

	cfi = CFI$new(
	learner = learner,
	task = task,
	measure = measure,
	resampling = resampling,
	sampler = knockoff_sampler,
	# Could use more repeats for stability but would need to creat emultiple knockoff matrices in sampler
	n_repeats = 1
	)

	cfi$compute()

	# Same as CPI under the hood:
	cfi$importance(ci_method = "cpi")
	#> Key: <feature>
	#> feature importance se statistic p.value conf_lower
	#> <char> <num> <num> <num> <num> <num>
	#> 1: x1 4.3122236075 0.369125526 11.68226879 6.020552e-27 3.703367219
	#> 2: x2 -0.0007774337 0.016377543 -0.04746949 5.189162e-01 -0.027791474
	#> 3: x3 1.7174102789 0.117239743 14.64870386 3.970628e-38 1.524028457
	#> 4: x4 0.0027353962 0.002649429 1.03244751 1.513070e-01 -0.001634721
	#> conf_upper
	#> <num>
	#> 1: Inf
	#> 2: Inf
	#> 3: Inf
	#> 4: Inf

	# Observation-wise loss differences
	cfi$obs_loss()
	#> feature iter_rsmp iter_repeat row_ids loss_baseline loss_post
	#> <char> <int> <int> <int> <num> <num>
	#> 1: x1 1 1 2 1.234542e-06 2.636605010
	#> 2: x1 1 1 4 2.673268e-03 19.545713486
	#> 3: x1 1 1 11 4.150419e-02 0.008964166
	#> 4: x1 1 1 16 7.323025e-04 0.826810619
	#> 5: x1 1 1 29 1.553946e-02 2.216122152
	#> ---
	#> 1328: x4 1 1 995 6.389680e-02 0.028051615
	#> 1329: x4 1 1 996 1.995682e-02 0.032137676
	#> 1330: x4 1 1 998 1.184186e+00 1.371750862
	#> 1331: x4 1 1 999 4.278355e-01 0.569745025
	#> 1332: x4 1 1 1000 2.227433e-02 0.010902248
	#> obs_importance
	#> <num>
	#> 1: 2.63660377
	#> 2: 19.54304022
	#> 3: -0.03254002
	#> 4: 0.82607832
	#> 5: 2.20058269
	#> ---
	#> 1328: -0.03584518
	#> 1329: 0.01218085
	#> 1330: 0.18756487
	#> 1331: 0.14190949
	#> 1332: -0.01137208


	# Same but with groups:

	task$feature_names
	#> [1] "x1" "x2" "x3" "x4"

	groups = list(
	correlated = c("x1", "x2"),
	noise = c("x3", "x4")
	)

	cfi = CFI$new(
	learner = learner,
	task = task,
	# Just add the group definition, rest is identical
	groups = groups,
	measure = measure,
	resampling = resampling,
	sampler = knockoff_sampler,
	n_repeats = 1
	)

	cfi$compute()
	cfi$importance(ci_method = "cpi")
	#> Key: <feature>
	#> feature importance se statistic p.value conf_lower conf_upper
	#> <char> <num> <num> <num> <num> <num> <num>
	#> 1: correlated 4.607162 0.3392991 13.57847 5.224602e-34 4.047504 Inf
	#> 2: noise 1.709909 0.1280463 13.35383 3.735263e-33 1.498702 Inf
No results found