Created
February 9, 2026 01:33
-
-
Save rhoit/f3db797e9e4c471228fa5dd363c41cc7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/python | |
| import argparse | |
| import itertools | |
| import collections | |
| sample = [ | |
| ['Butter', 'Bread', 'Milk'], | |
| ['Bread', 'Milk'], | |
| ['Butter', 'Milk'], | |
| ['Butter', 'Eggs', 'Bread'], | |
| ['Butter', 'Eggs', 'Bread', 'Milk'], | |
| ] | |
| def get_frequent_itemsets(dataset, min_support): | |
| # Count individual items | |
| freq = collections.defaultdict(int) | |
| for n, row in enumerate(dataset): | |
| for item in row: | |
| freq[frozenset([item])] += 1 | |
| frequent_itemsets = {} | |
| # frequent 1-itemsets (L1) | |
| for itemset, count in freq.items(): | |
| if count / n >= min_support: | |
| frequent_itemsets[itemset] = count | |
| frequent_n_itemsets = frequent_itemsets.copy() | |
| for k in range(2, len(freq) + 1): | |
| all_k_itemset = set() | |
| for c in itertools.combinations(frequent_n_itemsets.keys(), 2): | |
| k_itemset = frozenset.union(*c) | |
| if len(k_itemset) == k: | |
| all_k_itemset.add(k_itemset) | |
| new_freq = collections.defaultdict(int) | |
| for row in dataset: | |
| row_set = set(row) | |
| for k_itemset in all_k_itemset: | |
| new_freq[k_itemset] += k_itemset.issubset(row_set) | |
| # frequent n-itemsets (L1, L2... Lk) | |
| frequent_n_itemsets = { | |
| itemset : count for itemset, count in new_freq.items() | |
| if count / n >= min_support | |
| } | |
| if len(frequent_n_itemsets) == 0: break | |
| frequent_itemsets.update(frequent_n_itemsets) | |
| return frequent_itemsets | |
| def gen_rules(frequent_itemsets, min_confidence, n): | |
| rules = [] | |
| for itemset, support_count in frequent_itemsets.items(): | |
| if len(itemset) < 1: continue | |
| # For every possible subset 'A', the rule is A -> (itemset - A) | |
| for i in range(1, len(itemset)): | |
| for antecedent in itertools.combinations(itemset, i): | |
| antecedent = frozenset(antecedent) | |
| consequent = itemset - antecedent | |
| # Confidence(A -> B) = Support(A U B) / Support(A) | |
| # Confidence = count(full_itemset) / count(antecedent) | |
| confidence = support_count / frequent_itemsets[antecedent] | |
| if confidence >= min_confidence: | |
| rules.append({ | |
| # 'antecedent' -> {set(consequent)}', | |
| # f'{antecedent} -> {set(consequent)}', | |
| 'rule' : f'{antecedent} -> {set(consequent)}', | |
| 'support' : support_count / n, | |
| 'confidence' : confidence, | |
| 'lift' : confidence / (frequent_itemsets[consequent] / n) | |
| }) | |
| return rules | |
| def main(sysArgs, dataset): | |
| print('Frequent Itemsets:') | |
| frequent_itemsets = get_frequent_itemsets(dataset, sysArgs.min_support) | |
| print('sno', 'Support', 'Itemset') | |
| for i, (itemset, count) in enumerate(frequent_itemsets.items(), 0): | |
| print('{:3} {:7.4f} {}'.format(i, count/len(dataset), itemset)) | |
| if sysArgs.min_confidence is None: return | |
| print() | |
| print('Rule Generation') | |
| rules = gen_rules(frequent_itemsets, sysArgs.min_confidence, n=len(dataset)) | |
| print('sno', 'Support', 'Confidence', 'Lift', ' Rule') | |
| for i, rule in enumerate(rules, 1): | |
| print('{:3} {:7.4f} {:8.4f} {:8.4f} {}'.format(i, rule['support'], rule['confidence'], rule['lift'], rule['rule'])) | |
| if __name__ == '__main__': | |
| ap = argparse.ArgumentParser() | |
| ap.add_argument( | |
| '-s', | |
| '--min_support', | |
| default = 0.6, | |
| type = float, | |
| help = 'minimum support value', | |
| ) | |
| ap.add_argument( | |
| '-c', | |
| '--min_confidence', | |
| type = float, | |
| help = 'minimum confidence value', | |
| ) | |
| main(ap.parse_args(), sample) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment