Skip to content

Instantly share code, notes, and snippets.

@rhoit
Created February 16, 2026 03:09
Show Gist options
  • Select an option

  • Save rhoit/c1f1105072e945cb9cc4d26975497bfa to your computer and use it in GitHub Desktop.

Select an option

Save rhoit/c1f1105072e945cb9cc4d26975497bfa to your computer and use it in GitHub Desktop.
#!../venv/bin/python
import json
import math
import collections
import pandas as pd
def entropy(target_col):
counts = collections.Counter(target_col)
total = len(target_col)
entropy_val = 0
for count in counts.values():
prob = count / total
entropy_val -= prob * math.log2(prob)
return entropy_val
def information_gain(df, attr, target):
total_entropy = entropy(df[target])
values = df[attr].unique()
weighted_entropy = 0
for value in values:
subset = df[df[attr] == value]
subset_entropy = entropy(subset[target])
weighted_entropy += (len(subset) / len(df)) * subset_entropy
return total_entropy - weighted_entropy
def id3_tree(df, target, attributes, default_class=None):
target_values = df[target].unique()
if len(target_values) == 1:
return target_values[0] # Pure node
if len(attributes) == 0:
return collections.Counter(df[target]).most_common(1)[0][0] # Majority class
gains = {attr: information_gain(df, attr, target) for attr in attributes}
best_attr = max(gains, key=gains.get)
tree = {best_attr: {}}
remaining_attrs = [attr for attr in attributes if attr != best_attr]
for attr_val in df[best_attr].unique():
subset = df[df[best_attr] == attr_val]
subtree = id3_tree(subset, target, remaining_attrs)
tree[best_attr][attr_val] = subtree
return tree
df = pd.read_csv('./data/cricket.csv')
features = ['outlook', 'temperature', 'humidity', 'wind']
dtree = id3_tree(df, 'play', features)
print(json.dumps(dtree, indent=4))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment