YangSiJun528 · January 29, 2026 03:33
diff --git a/AI.md b/AI.md
diff --git a/NFA.py b/NFA.py
 # 원본 링크: https://swtch.com/~rsc/regexp/nfa.c.txt
 # 원본 C코드는 이해하기 어려워서 Python으로 변환한 코드

 # =========================
 # constants
 # =========================
 # 0–255는 문자용. 256, 257은 특수 상태.
 # ASCII만 다룬다면 128, 129로 줄여도 됨.
 STATE_MATCH = 256  # 매칭 성공 상태
 STATE_SPLIT = 257  # ε-전이 분기 상태


 # =========================
 # NFA state
 # =========================
 class State:
    """NFA의 단일 상태."""
    def __init__(self, char_code: int, out_edge: 'State' = None, out_edge1: 'State' = None):
        self.char_code = char_code  # 문자(0-255), MATCH, SPLIT 중 하나
        self.out_edge = out_edge    # 다음 상태 (전이)
        self.out_edge1 = out_edge1  # SPLIT일 때 두 번째 분기
        self.lastlist = 0           # 중복 방문 방지용 마킹


 # =========================
 # fragment (build-time only)
 # =========================
 class Fragment:
    """NFA 빌드 중 사용하는 조각. postfix_to_nfa 스택에서만 씀."""
    def __init__(self, start: 'State', out_states: list['State']):
        self.start = start          # 이 조각의 시작 상태
        self.out_states = out_states  # 아직 연결 안 된 매달린 상태들


 # =========================
 # infix -> postfix
 # =========================
 def re2post(pattern: str) -> str | None:
    """
    정규식을 postfix로 변환. 암묵적 연결을 '.'로 명시화.
    예: "ab" -> "ab.", "a|b" -> "ab|", "a*b" -> "a*b."
    실패 시 None 반환.
    """
    output: list[str] = []
    stack: list[tuple[int, int]] = []  # 괄호 진입 시 (nalt, natom) 저장

    nalt = 0   # 현재 레벨의 | 개수
    natom = 0  # 현재 레벨의 피연산자 개수

    for ch in pattern:
        match ch:
            case '(':
                # 연결 보류, 현재 상태 저장 후 새 레벨 시작
                if natom > 1:
                    natom -= 1
                    output.append('.')
                stack.append((nalt, natom))
                nalt = natom = 0

            case '|':
                # 지금까지의 원자들 연결 후 alternation 카운트
                if natom == 0:
                    return None
                while natom > 1:
                    natom -= 1
                    output.append('.')
                nalt += 1
                natom = 0

            case ')':
                # 괄호 내부 마무리: 연결 → alternation → 이전 레벨 복원
                if not stack or natom == 0:
                    return None
                while natom > 1:
                    natom -= 1
                    output.append('.')
                while nalt > 0:
                    output.append('|')
                    nalt -= 1
                nalt, natom = stack.pop()
                natom += 1  # 괄호 전체가 하나의 원자

            case '*':
                # 단항 연산자: 바로 출력
                if natom == 0:
                    return None
                output.append('*')

            case _:
                # 리터럴: 이전 원자와 연결 필요하면 '.' 추가
                if natom > 1:
                    natom -= 1
                    output.append('.')
                output.append(ch)
                natom += 1

    if stack:  # 괄호 안 닫힘
        return None

    # 남은 연결과 alternation 처리
    while natom > 1:
        natom -= 1
        output.append('.')
    while nalt > 0:
        output.append('|')
        nalt -= 1

    return ''.join(output)


 # =========================
 # postfix -> NFA (Thompson)
 # =========================
 def postfix_to_nfa(postfix: str) -> State:
    """
    postfix 정규식을 NFA 그래프로 변환 (Thompson 구성법).
    스택에 Fragment 쌓으면서 연산자마다 조립.
    최종 Fragment를 MATCH 상태에 연결 후 시작 상태 반환.
    """
    stack: list[Fragment] = []

    for ch in postfix:
        match ch:
            case '.':  # 연결: frag1 뒤에 frag2 붙임
                frag2 = stack.pop()
                frag1 = stack.pop()
                for s in frag1.out_states:
                    s.out_edge = frag2.start
                stack.append(Fragment(frag1.start, frag2.out_states))

            case '|':  # 선택: SPLIT에서 양쪽으로 분기
                frag2 = stack.pop()
                frag1 = stack.pop()
                s = State(STATE_SPLIT, frag1.start, frag2.start)
                stack.append(Fragment(s, frag1.out_states + frag2.out_states))

            case '*':  # 반복: SPLIT → frag → 다시 SPLIT (루프)
                frag = stack.pop()
                s = State(STATE_SPLIT, frag.start, None)  # out1은 탈출용
                for st in frag.out_states:
                    st.out_edge = s  # frag 끝에서 SPLIT으로 복귀
                stack.append(Fragment(s, [s]))  # s.out_edge1이 매달린 엣지

            case _:  # 리터럴: 단일 상태 생성
                s = State(ord(ch))
                stack.append(Fragment(s, [s]))

    # 최종 조립: 매달린 엣지들을 MATCH에 연결
    frag = stack.pop()
    match_state = State(STATE_MATCH)
    for s in frag.out_states:
        s.out_edge = match_state

    return frag.start


 # =========================
 # NFA execution (list + id)
 # =========================
 list_id = 0  # 전역 세대 카운터. 매 상태 집합 생성 시 증가.


 def add_state(states: list[State], s: State) -> None:
    """
    상태 s를 states에 추가. ε-전이(SPLIT)는 재귀적으로 따라감.
    list_id로 같은 라운드 내 중복 방문 방지.
    """
    global list_id

    if s is None or s.lastlist == list_id:
        return  # None이거나 이미 이번 라운드에 추가됨

    s.lastlist = list_id
    states.append(s)

    if s.char_code == STATE_SPLIT:  # ε-전이: 양쪽 재귀 탐색
        add_state(states, s.out_edge)
        add_state(states, s.out_edge1)


 def nfa_match(start: State, text: str) -> bool:
    """
    NFA 시뮬레이션. 입력 문자마다 상태 집합 전이.
    모든 문자 소비 후 MATCH 상태 포함 여부로 판정.
    """
    global list_id

    # 초기 상태 집합: start의 ε-closure
    current: list[State] = []
    list_id += 1
    add_state(current, start)

    # 각 문자 처리
    for ch in text:
        next_states: list[State] = []
        list_id += 1

        for s in current:
            if s.char_code == ord(ch):  # 이 문자로 전이 가능?
                add_state(next_states, s.out_edge)

        current = next_states

    # 최종 상태에 MATCH 있으면 성공
    return any(s.char_code == STATE_MATCH for s in current)


 # =========================
 # public API
 # =========================
 def match(pattern: str, text: str) -> bool:
    """정규식 pattern이 text 전체와 매칭되는지 검사."""
    postfix = re2post(pattern)
    if postfix is None:
        raise ValueError("invalid regex")

    start = postfix_to_nfa(postfix)
    return nfa_match(start, text)


 # =========================
 # tests
 # =========================
 def run_tests() -> None:
    tests = [
        ("a", "a", True),
        ("a", "b", False),
        ("ab", "ab", True),
        ("a|b", "b", True),
        ("a*", "", True),
        ("a*", "aaa", True),
        ("(ab)*", "abab", True),
        ("a(b|c)*", "abcbcbc", True),
        ("a(b|c)*", "accc", True),
        ("a(b|c)*", "b", False),
    ]

    for pat, text, expected in tests:
        result = match(pat, text)
        print(f"{pat!r:10} {text!r:10} -> {result} (expected {expected})")


 if __name__ == "__main__":
    run_tests()
	DFA	NFA
각 입력에 대한 다음 상태	정확히 1개	0개, 1개, 또는 여러 개
ε-전이	없음	있음
현재 상태	항상 1개	여러 개 동시 가능
구현	단순	시뮬레이션 필요
	# 원본 링크: https://swtch.com/~rsc/regexp/nfa.c.txt
	# 원본 C코드는 이해하기 어려워서 Python으로 변환한 코드

	# =========================
	# constants
	# =========================
	# 0–255는 문자용. 256, 257은 특수 상태.
	# ASCII만 다룬다면 128, 129로 줄여도 됨.
	STATE_MATCH = 256 # 매칭 성공 상태
	STATE_SPLIT = 257 # ε-전이 분기 상태


	# =========================
	# NFA state
	# =========================
	class State:
	"""NFA의 단일 상태."""
	def __init__(self, char_code: int, out_edge: 'State' = None, out_edge1: 'State' = None):
	self.char_code = char_code # 문자(0-255), MATCH, SPLIT 중 하나
	self.out_edge = out_edge # 다음 상태 (전이)
	self.out_edge1 = out_edge1 # SPLIT일 때 두 번째 분기
	self.lastlist = 0 # 중복 방문 방지용 마킹


	# =========================
	# fragment (build-time only)
	# =========================
	class Fragment:
	"""NFA 빌드 중 사용하는 조각. postfix_to_nfa 스택에서만 씀."""
	def __init__(self, start: 'State', out_states: list['State']):
	self.start = start # 이 조각의 시작 상태
	self.out_states = out_states # 아직 연결 안 된 매달린 상태들


	# =========================
	# infix -> postfix
	# =========================
	def re2post(pattern: str) -> str \| None:
	"""
	정규식을 postfix로 변환. 암묵적 연결을 '.'로 명시화.
	예: "ab" -> "ab.", "a\|b" -> "ab\|", "ab" -> "ab."
	실패 시 None 반환.
	"""
	output: list[str] = []
	stack: list[tuple[int, int]] = [] # 괄호 진입 시 (nalt, natom) 저장

	nalt = 0 # 현재 레벨의 \| 개수
	natom = 0 # 현재 레벨의 피연산자 개수

	for ch in pattern:
	match ch:
	case '(':
	# 연결 보류, 현재 상태 저장 후 새 레벨 시작
	if natom > 1:
	natom -= 1
	output.append('.')
	stack.append((nalt, natom))
	nalt = natom = 0

	case '\|':
	# 지금까지의 원자들 연결 후 alternation 카운트
	if natom == 0:
	return None
	while natom > 1:
	natom -= 1
	output.append('.')
	nalt += 1
	natom = 0

	case ')':
	# 괄호 내부 마무리: 연결 → alternation → 이전 레벨 복원
	if not stack or natom == 0:
	return None
	while natom > 1:
	natom -= 1
	output.append('.')
	while nalt > 0:
	output.append('\|')
	nalt -= 1
	nalt, natom = stack.pop()
	natom += 1 # 괄호 전체가 하나의 원자

	case '*':
	# 단항 연산자: 바로 출력
	if natom == 0:
	return None
	output.append('*')

	case _:
	# 리터럴: 이전 원자와 연결 필요하면 '.' 추가
	if natom > 1:
	natom -= 1
	output.append('.')
	output.append(ch)
	natom += 1

	if stack: # 괄호 안 닫힘
	return None

	# 남은 연결과 alternation 처리
	while natom > 1:
	natom -= 1
	output.append('.')
	while nalt > 0:
	output.append('\|')
	nalt -= 1

	return ''.join(output)


	# =========================
	# postfix -> NFA (Thompson)
	# =========================
	def postfix_to_nfa(postfix: str) -> State:
	"""
	postfix 정규식을 NFA 그래프로 변환 (Thompson 구성법).
	스택에 Fragment 쌓으면서 연산자마다 조립.
	최종 Fragment를 MATCH 상태에 연결 후 시작 상태 반환.
	"""
	stack: list[Fragment] = []

	for ch in postfix:
	match ch:
	case '.': # 연결: frag1 뒤에 frag2 붙임
	frag2 = stack.pop()
	frag1 = stack.pop()
	for s in frag1.out_states:
	s.out_edge = frag2.start
	stack.append(Fragment(frag1.start, frag2.out_states))

	case '\|': # 선택: SPLIT에서 양쪽으로 분기
	frag2 = stack.pop()
	frag1 = stack.pop()
	s = State(STATE_SPLIT, frag1.start, frag2.start)
	stack.append(Fragment(s, frag1.out_states + frag2.out_states))

	case '*': # 반복: SPLIT → frag → 다시 SPLIT (루프)
	frag = stack.pop()
	s = State(STATE_SPLIT, frag.start, None) # out1은 탈출용
	for st in frag.out_states:
	st.out_edge = s # frag 끝에서 SPLIT으로 복귀
	stack.append(Fragment(s, [s])) # s.out_edge1이 매달린 엣지

	case _: # 리터럴: 단일 상태 생성
	s = State(ord(ch))
	stack.append(Fragment(s, [s]))

	# 최종 조립: 매달린 엣지들을 MATCH에 연결
	frag = stack.pop()
	match_state = State(STATE_MATCH)
	for s in frag.out_states:
	s.out_edge = match_state

	return frag.start


	# =========================
	# NFA execution (list + id)
	# =========================
	list_id = 0 # 전역 세대 카운터. 매 상태 집합 생성 시 증가.


	def add_state(states: list[State], s: State) -> None:
	"""
	상태 s를 states에 추가. ε-전이(SPLIT)는 재귀적으로 따라감.
	list_id로 같은 라운드 내 중복 방문 방지.
	"""
	global list_id

	if s is None or s.lastlist == list_id:
	return # None이거나 이미 이번 라운드에 추가됨

	s.lastlist = list_id
	states.append(s)

	if s.char_code == STATE_SPLIT: # ε-전이: 양쪽 재귀 탐색
	add_state(states, s.out_edge)
	add_state(states, s.out_edge1)


	def nfa_match(start: State, text: str) -> bool:
	"""
	NFA 시뮬레이션. 입력 문자마다 상태 집합 전이.
	모든 문자 소비 후 MATCH 상태 포함 여부로 판정.
	"""
	global list_id

	# 초기 상태 집합: start의 ε-closure
	current: list[State] = []
	list_id += 1
	add_state(current, start)

	# 각 문자 처리
	for ch in text:
	next_states: list[State] = []
	list_id += 1

	for s in current:
	if s.char_code == ord(ch): # 이 문자로 전이 가능?
	add_state(next_states, s.out_edge)

	current = next_states

	# 최종 상태에 MATCH 있으면 성공
	return any(s.char_code == STATE_MATCH for s in current)


	# =========================
	# public API
	# =========================
	def match(pattern: str, text: str) -> bool:
	"""정규식 pattern이 text 전체와 매칭되는지 검사."""
	postfix = re2post(pattern)
	if postfix is None:
	raise ValueError("invalid regex")

	start = postfix_to_nfa(postfix)
	return nfa_match(start, text)


	# =========================
	# tests
	# =========================
	def run_tests() -> None:
	tests = [
	("a", "a", True),
	("a", "b", False),
	("ab", "ab", True),
	("a\|b", "b", True),
	("a*", "", True),
	("a*", "aaa", True),
	("(ab)*", "abab", True),
	("a(b\|c)*", "abcbcbc", True),
	("a(b\|c)*", "accc", True),
	("a(b\|c)*", "b", False),
	]

	for pat, text, expected in tests:
	result = match(pat, text)
	print(f"{pat!r:10} {text!r:10} -> {result} (expected {expected})")


	if __name__ == "__main__":
	run_tests()