Skip to content

Instantly share code, notes, and snippets.

@veritem
Last active December 4, 2025 23:14
Show Gist options
  • Select an option

  • Save veritem/37192c52c97d6bc772515504fcd181e7 to your computer and use it in GitHub Desktop.

Select an option

Save veritem/37192c52c97d6bc772515504fcd181e7 to your computer and use it in GitHub Desktop.
problem4-toc
#!/usr/bin/env python3
from collections import defaultdict
class converter:
def __init__(self, G):
self.grammar = G
self.counter = 1
def new_nt(self):
nt = f"U{self.counter}"
self.counter += 1
return nt
def remove_units(self):
units = defaultdict(set)
for A, prods in self.grammar.items():
for p in prods:
if len(p) == 1 and p[0].isupper():
units[A].add(p[0])
changed = True
while changed:
changed = False
for A in list(units):
for B in list(units[A]):
for C in units[B]:
if C not in units[A]:
units[A].add(C)
changed = True
new_grammar = defaultdict(list)
for A, prods in self.grammar.items():
for p in prods:
if not (len(p)==1 and p[0].isupper()):
new_grammar[A].append(p)
for B in units[A]:
for p in self.grammar[B]:
if not (len(p)==1 and p[0].isupper()):
new_grammar[A].append(p)
self.grammar = new_grammar
def replace_terminals(self):
Tmap = {}
new_grammar = defaultdict(list)
for term, prods in self.grammar.items():
for p in prods:
if len(p) > 1:
newp = []
for sym in p:
if not sym.isupper():
if sym not in Tmap:
nt = self.new_nt()
Tmap[sym] = nt
new_grammar[nt].append([sym])
newp.append(Tmap[sym])
else:
newp.append(sym)
new_grammar[term].append(newp)
else:
new_grammar[term].append(p)
for t, nt in Tmap.items():
new_grammar[nt].append([t])
self.grammar = new_grammar
def binarize(self):
new_grammar = defaultdict(list)
for A, prods in self.grammar.items():
for p in prods:
while len(p) > 2:
X = self.new_nt()
new_grammar[X].append(p[1:])
p = [p[0], X]
new_grammar[A].append(p)
self.grammar = new_grammar
def to_cnf(self):
self.remove_units()
self.replace_terminals()
self.binarize()
return dict(self.grammar)
def dedupe(grammar):
clean = {}
for g, prods in grammar.items():
unique = {tuple(p) for p in prods}
clean[g] = [list(p) for p in sorted(unique)]
return clean
grammar = {
"S": [
["(", "S", ")"],
["S", "+", "S"],
["-", "S"],
["p"],
["q"]
]
}
cnf = converter(grammar).to_cnf()
cnf = dedupe(cnf)
print("Chomsky Normal Form:")
for term, prods in cnf.items():
for p in prods:
print(f"{term} → {' '.join(p)}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment