Skip to content

Instantly share code, notes, and snippets.

@lmassaron
Created March 19, 2023 12:30
Show Gist options
  • Select an option

  • Save lmassaron/866b15ff65339ca591f6ae6f58b003fc to your computer and use it in GitHub Desktop.

Select an option

Save lmassaron/866b15ff65339ca591f6ae6f58b003fc to your computer and use it in GitHub Desktop.
Theil's U & Cramer's V
def cramers_v(x, y):
confusion_matrix = pd.crosstab(x,y)
chi2 = ss.chi2_contingency(confusion_matrix)[0]
n = confusion_matrix.sum().sum()
phi2 = chi2/n
r,k = confusion_matrix.shape
phi2corr = max(0, phi2-((k-1)*(r-1))/(n-1))
rcorr = r-((r-1)**2)/(n-1)
kcorr = k-((k-1)**2)/(n-1)
return np.sqrt(phi2corr/min((kcorr-1),(rcorr-1)))
def conditional_entropy(x,y):
# entropy of x given y
y_counter = Counter(y)
xy_counter = Counter(list(zip(x,y)))
total_occurrences = sum(y_counter.values())
entropy = 0
for xy in xy_counter.keys():
p_xy = xy_counter[xy] / total_occurrences
p_y = y_counter[xy[1]] / total_occurrences
entropy += p_xy * math.log(p_y/p_xy)
return entropy
def theils_u(x, y):
s_xy = conditional_entropy(x,y)
x_counter = Counter(x)
total_occurrences = sum(x_counter.values())
p_x = list(map(lambda n: n/total_occurrences, x_counter.values()))
s_x = ss.entropy(p_x)
if s_x == 0:
return 1
else:
return (s_x - s_xy) / s_x
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment