Skip to content

Instantly share code, notes, and snippets.

@rhoit
Created February 9, 2026 01:33
Show Gist options
  • Select an option

  • Save rhoit/f3db797e9e4c471228fa5dd363c41cc7 to your computer and use it in GitHub Desktop.

Select an option

Save rhoit/f3db797e9e4c471228fa5dd363c41cc7 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
import argparse
import itertools
import collections
sample = [
['Butter', 'Bread', 'Milk'],
['Bread', 'Milk'],
['Butter', 'Milk'],
['Butter', 'Eggs', 'Bread'],
['Butter', 'Eggs', 'Bread', 'Milk'],
]
def get_frequent_itemsets(dataset, min_support):
# Count individual items
freq = collections.defaultdict(int)
for n, row in enumerate(dataset):
for item in row:
freq[frozenset([item])] += 1
frequent_itemsets = {}
# frequent 1-itemsets (L1)
for itemset, count in freq.items():
if count / n >= min_support:
frequent_itemsets[itemset] = count
frequent_n_itemsets = frequent_itemsets.copy()
for k in range(2, len(freq) + 1):
all_k_itemset = set()
for c in itertools.combinations(frequent_n_itemsets.keys(), 2):
k_itemset = frozenset.union(*c)
if len(k_itemset) == k:
all_k_itemset.add(k_itemset)
new_freq = collections.defaultdict(int)
for row in dataset:
row_set = set(row)
for k_itemset in all_k_itemset:
new_freq[k_itemset] += k_itemset.issubset(row_set)
# frequent n-itemsets (L1, L2... Lk)
frequent_n_itemsets = {
itemset : count for itemset, count in new_freq.items()
if count / n >= min_support
}
if len(frequent_n_itemsets) == 0: break
frequent_itemsets.update(frequent_n_itemsets)
return frequent_itemsets
def gen_rules(frequent_itemsets, min_confidence, n):
rules = []
for itemset, support_count in frequent_itemsets.items():
if len(itemset) < 1: continue
# For every possible subset 'A', the rule is A -> (itemset - A)
for i in range(1, len(itemset)):
for antecedent in itertools.combinations(itemset, i):
antecedent = frozenset(antecedent)
consequent = itemset - antecedent
# Confidence(A -> B) = Support(A U B) / Support(A)
# Confidence = count(full_itemset) / count(antecedent)
confidence = support_count / frequent_itemsets[antecedent]
if confidence >= min_confidence:
rules.append({
# 'antecedent' -> {set(consequent)}',
# f'{antecedent} -> {set(consequent)}',
'rule' : f'{antecedent} -> {set(consequent)}',
'support' : support_count / n,
'confidence' : confidence,
'lift' : confidence / (frequent_itemsets[consequent] / n)
})
return rules
def main(sysArgs, dataset):
print('Frequent Itemsets:')
frequent_itemsets = get_frequent_itemsets(dataset, sysArgs.min_support)
print('sno', 'Support', 'Itemset')
for i, (itemset, count) in enumerate(frequent_itemsets.items(), 0):
print('{:3} {:7.4f} {}'.format(i, count/len(dataset), itemset))
if sysArgs.min_confidence is None: return
print()
print('Rule Generation')
rules = gen_rules(frequent_itemsets, sysArgs.min_confidence, n=len(dataset))
print('sno', 'Support', 'Confidence', 'Lift', ' Rule')
for i, rule in enumerate(rules, 1):
print('{:3} {:7.4f} {:8.4f} {:8.4f} {}'.format(i, rule['support'], rule['confidence'], rule['lift'], rule['rule']))
if __name__ == '__main__':
ap = argparse.ArgumentParser()
ap.add_argument(
'-s',
'--min_support',
default = 0.6,
type = float,
help = 'minimum support value',
)
ap.add_argument(
'-c',
'--min_confidence',
type = float,
help = 'minimum confidence value',
)
main(ap.parse_args(), sample)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment