Skip to content

Instantly share code, notes, and snippets.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@glemaitre
glemaitre / TreeSHAP_bug.ipynb
Last active March 29, 2022 09:55
TreeSHAP bug reproducer
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# %%
# Download the original dataset to be able to easily build an index with the
# original datetime.
# The dataset is available at:
# https://archive.ics.uci.edu/ml/machine-learning-databases/00275/Bike-Sharing-Dataset.zip
import pandas as pd
df_external = pd.read_csv(
"~/Downloads/Bike-Sharing-Dataset/hour.csv",
index_col=0,
# %%
from sklearn.datasets import fetch_openml
usps = fetch_openml(data_id=41082)
# %%
data = usps.data
target = usps.target
# %%
import numpy as np
import pandas as pd
def calcul_chute_tension(
Ib=1, S=1.5, Un=400, L=0.1, metal="cuivre", phi=np.arccos(0.85)
):
Ib = np.asarray(Ib)
S = np.asarray(S)
import pandas as pd
import pytest
def func(expected_columns):
df = pd.DataFrame({
"A": [1, 2, 3],
"B": [1, 2, 3],
"C": [1, 2, 3]
@pytest.mark.parametrize("name, Tree", REG_TREES.items())
@pytest.mark.parametrize("criterion", REG_CRITERIONS)
def test_diabetes_overfit(name, Tree, criterion):
# check consistency of overfitted trees on the diabetes dataset
# since the trees will overfit, we expect an MSE of 0
reg = Tree(criterion=criterion, random_state=0)
reg.fit(diabetes.data, diabetes.target)
score = mean_squared_error(diabetes.target, reg.predict(diabetes.data))
assert score == pytest.approx(0), (
f"Failed with {name}, criterion = {criterion} and score = {score}"
In [1]: import numpy as np
In [2]: X = ["One", "string"]
In [3]: X
Out[3]: ['One', 'string']
In [4]: X[0]
Out[4]: 'One'
# %%
from sklearn.datasets import make_classification
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_validate
RANDOM_SEED = 2
import cv2
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
def grab_frame(cap):
_, frame = cap.read()
return cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)