Created
February 16, 2026 03:09
-
-
Save rhoit/c1f1105072e945cb9cc4d26975497bfa to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!../venv/bin/python | |
| import json | |
| import math | |
| import collections | |
| import pandas as pd | |
| def entropy(target_col): | |
| counts = collections.Counter(target_col) | |
| total = len(target_col) | |
| entropy_val = 0 | |
| for count in counts.values(): | |
| prob = count / total | |
| entropy_val -= prob * math.log2(prob) | |
| return entropy_val | |
| def information_gain(df, attr, target): | |
| total_entropy = entropy(df[target]) | |
| values = df[attr].unique() | |
| weighted_entropy = 0 | |
| for value in values: | |
| subset = df[df[attr] == value] | |
| subset_entropy = entropy(subset[target]) | |
| weighted_entropy += (len(subset) / len(df)) * subset_entropy | |
| return total_entropy - weighted_entropy | |
| def id3_tree(df, target, attributes, default_class=None): | |
| target_values = df[target].unique() | |
| if len(target_values) == 1: | |
| return target_values[0] # Pure node | |
| if len(attributes) == 0: | |
| return collections.Counter(df[target]).most_common(1)[0][0] # Majority class | |
| gains = {attr: information_gain(df, attr, target) for attr in attributes} | |
| best_attr = max(gains, key=gains.get) | |
| tree = {best_attr: {}} | |
| remaining_attrs = [attr for attr in attributes if attr != best_attr] | |
| for attr_val in df[best_attr].unique(): | |
| subset = df[df[best_attr] == attr_val] | |
| subtree = id3_tree(subset, target, remaining_attrs) | |
| tree[best_attr][attr_val] = subtree | |
| return tree | |
| df = pd.read_csv('./data/cricket.csv') | |
| features = ['outlook', 'temperature', 'humidity', 'wind'] | |
| dtree = id3_tree(df, 'play', features) | |
| print(json.dumps(dtree, indent=4)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment