ljw1004 · February 12, 2026 00:39
diff --git a/learning-hook.py b/learning-hook.py
 #!/usr/bin/env python3
 """
 PostToolUse hook: periodic LEARNINGS.md reminder.

 On first invocation for an agent/subagent, outputs the full LEARNINGS.md
 instructions. Thereafter, outputs a short reminder about every 10 assistant turns.
 Does nothing if no LEARNINGS.md exists in the project root.

 This works on a per-agent or per-subagent basis. This is tricky because
 hooks don't tell us who they fired for. We work around this by looking for
 which transcript file contains this hook's tool_use_id in its most recent
 assistant turn. This works because claude gets the assistant response back
 and writes it in its entirety to the transcript file (as a sequence of assistant
 lines), and only afterwards does it start processing tool-uses. Therefore
 by the time it gets to PostToolUseHook then the assistant tool-use for that
 hook is assuredly the most recent assistant turn. We rely on the disk
 layout of {session_id}.jsonl for the main agent and {session_id}/subagents/*.jsonl
 for subagent transcripts. We keep a per-session cache "{session_id}.cache.json"
 that combines main and subagents, and parse only incremental updates to the session
 and subagent transcript files, tracking each file's byte length and protecting
 the cache with flock.

 This hook is stateless: it determines when to fire by scanning the transcript
 to identify the assistant turn index for the current tool_use_id.
 That's a lot of file to read! It tries to be fast by using string-match
 heuristics instead of json-parsing.
 """

 import fcntl
 import glob
 import json
 import os
 import sys
 from typing import TypedDict, cast


 FIRST_MESSAGE = """\
 <system-reminder>
 There is a file LEARNINGS.md in this project.

 As you work, consult LEARNINGS.md to build on previous experience. When you
 encounter a mistake that seems like it could be common, check LEARNINGS.md
 for relevant notes — and if nothing is written yet, record what you learned.
 Guidelines:
 - Record insights about problem constraints, strategies that worked or failed,
 and lessons learned
 - Update or remove memories that turn out to be wrong or outdated
 - Organize memory semantically by topic, not chronologically
 - Keep it under 200 lines. If it's longer, use additional files
 `LEARNINGS-{TOPIC}.md` and reference them from the main file.
 - Use the Write and Edit tools to update the file
 - Since this file is project-scope and shared with your team via version
 control, tailor your memories to this project
 </system-reminder>"""

 REMINDER_MESSAGE = "<system-reminder>Consult LEARNINGS.md and update it if you learn something new.</system-reminder>"


 class CacheEntry(TypedDict):
    byte_offset: int
    latest_tool_use_ids: list[str]
    in_assistant: bool  # whether the file ended mid-assistant-clump when last read
 type Cache = dict[str, CacheEntry]  # relpath (relative to project dir) -> CacheEntry


 def update_cache(transcript_dir: str, session_id: str) -> Cache:
    """Load and incrementally update the per-session cache of transcript tool_use_ids.

    Returns the updated cache (keyed by rel_path from transcript_dir).
    The cache (at transcript_dir/{session_id}.cache.json) is flock-protected
    so parallel hook invocations don't corrupt it.

    Invariant: we can trust that at the moment we're invoked, all transcript
    files for main agent and subagent have complete jsonl lines.
    """
    cache_path = os.path.join(transcript_dir, session_id + ".cache.json")
    os.makedirs(transcript_dir, exist_ok=True)
    lock_fd = open(cache_path, "a+")
    fcntl.flock(lock_fd, fcntl.LOCK_EX)
    try:
        lock_fd.seek(0)
        raw = lock_fd.read()
        if raw.strip():
            try:
                cache = cast(Cache, json.loads(raw))
            except json.JSONDecodeError:
                cache = {}
                lock_fd.seek(0)
                lock_fd.truncate()
                lock_fd.write("{}")
                lock_fd.flush()
        else:
            cache = {}

        rel_paths = [session_id + ".jsonl"] if os.path.isfile(os.path.join(transcript_dir, session_id + ".jsonl")) else []
        for path in glob.glob(os.path.join(transcript_dir, session_id, "subagents", "*.jsonl")):
            rel_paths.append(os.path.relpath(path, transcript_dir))
        for rel_path in rel_paths:
            entry = cache.get(rel_path) or CacheEntry(byte_offset=0, latest_tool_use_ids=[], in_assistant=False)
            cache[rel_path] = _update_file_entry(transcript_dir, rel_path, entry)

        lock_fd.seek(0)
        lock_fd.truncate()
        lock_fd.write(json.dumps(cache))
        lock_fd.flush()
        return cache
    finally:
        fcntl.flock(lock_fd, fcntl.LOCK_UN)
        lock_fd.close()


 def _update_file_entry(directory: str, rel_path: str, entry: CacheEntry) -> CacheEntry:
    """Read new bytes from a transcript file and update the cache entry."""
    full_path = os.path.join(directory, rel_path)
    try:
        file_size = os.path.getsize(full_path)
    except OSError:
        return entry
    cached_offset = entry.get("byte_offset", 0)
    if file_size == cached_offset:
        return entry
    if file_size < cached_offset:
        cached_offset = 0  # file shrunk — reparse
    try:
        with open(full_path, "rb") as f:
            f.seek(cached_offset)
            new_bytes = f.read()
    except OSError:
        return entry
    # Collect tool_use_ids from the most recent clump of consecutive assistant
    # entries. Reset when a new clump starts (assistant after non-assistant).
    latest_ids = entry.get("latest_tool_use_ids", [])
    saw_non_assistant = not entry.get("in_assistant", False)
    for line in new_bytes.split(b"\n"):
        line = line.strip()
        if not line:
            continue
        try:
            obj = json.loads(line)
        except json.JSONDecodeError:
            continue
        if obj.get("type") == "assistant":
            if saw_non_assistant:
                latest_ids = []  # new clump — discard previous
                saw_non_assistant = False
            raw_content = obj.get("message", {}).get("content", [])
            if isinstance(raw_content, list):
                content = cast(list[dict[str, object]], raw_content)
                ids = [str(b["id"]) for b in content if b.get("type") == "tool_use" and "id" in b]
                latest_ids.extend(ids)
        else:
            saw_non_assistant = True
    return {"byte_offset": file_size, "latest_tool_use_ids": latest_ids, "in_assistant": not saw_non_assistant}


 # --- Transcript analysis ---

 def every_n_turns(transcript_path: str, self_tool_use_id: str, frequency: int) -> int | None:
    """This function is for PostToolUseHooks that want to fire every {frequency} assistant turns.

    A "turn" is a clump of consecutive assistant lines, broken only by user lines
    (other line types like system/progress don't break a clump).
    The definition of an assistant line is type="assistant" and message.role="assistant".
    The definition of a user line is type="user" and message.role="user".
    But, I don't want to have to json-parse every single line in the (very long) transcript!
    This function uses string-matching heuristics instead.

    The function works by scanning the transcript file. We decree that the first tool_use
    after an N-turn boundary is the one that triggers the reminder.
    This function returns 0 for the first time in the transcript, >0 for subsequent times,
    and None otherwise.
    """
    is_in_assistant_turn = False
    assistant_turn_index = -1
    min_turn_of_next_trigger = 0
    has_seen_tool_this_turn = False
    with open(transcript_path) as f:
        for line in f:
            prefix = line[:1024]
            if ',"type":"progress","data":{' in prefix:
                # to avoid the following string-match tests firing false positives
                continue
            elif '"type":"user","message":{"role":"user",' in prefix:
                is_in_assistant_turn = False
            elif '"type":"message"' in prefix and '"role":"assistant"' in prefix:
                if not is_in_assistant_turn:
                    assistant_turn_index += 1
                    is_in_assistant_turn = True
                    has_seen_tool_this_turn = False
                if '"type":"tool_use"' in prefix:
                    if assistant_turn_index >= min_turn_of_next_trigger and not has_seen_tool_this_turn:
                        has_seen_tool_this_turn = True
                        if self_tool_use_id in line:
                            # invariant: self_tool_use_id is necessarily in the most recent assistant turn
                            return min_turn_of_next_trigger
                        else:                    
                            min_turn_of_next_trigger = assistant_turn_index + frequency
    return None


 def main() -> None:
    # PostToolUseHook receive the following on its stdin
    input_data = json.loads(sys.stdin.read())
    tool_use_id: str = input_data["tool_use_id"]
    session_id: str = input_data["session_id"]
    transcript_path: str = input_data["transcript_path"]
    cwd: str = input_data["cwd"]

    learnings_path = os.path.join(cwd, "LEARNINGS.md")
    if os.path.isfile(learnings_path):
        transcript_dir = os.path.dirname(transcript_path)
        cache = update_cache(transcript_dir, session_id)
        rel_path = next((p for p, e in cache.items() if tool_use_id in e["latest_tool_use_ids"]), None)
        if rel_path is not None:
            i = every_n_turns(os.path.join(transcript_dir, rel_path), tool_use_id, 10)
            if i is not None:
                print(json.dumps({"hookSpecificOutput":{"hookEventName":"PostToolUse","additionalContext": FIRST_MESSAGE if i == 0 else REMINDER_MESSAGE}}))


 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	"""
	PostToolUse hook: periodic LEARNINGS.md reminder.

	On first invocation for an agent/subagent, outputs the full LEARNINGS.md
	instructions. Thereafter, outputs a short reminder about every 10 assistant turns.
	Does nothing if no LEARNINGS.md exists in the project root.

	This works on a per-agent or per-subagent basis. This is tricky because
	hooks don't tell us who they fired for. We work around this by looking for
	which transcript file contains this hook's tool_use_id in its most recent
	assistant turn. This works because claude gets the assistant response back
	and writes it in its entirety to the transcript file (as a sequence of assistant
	lines), and only afterwards does it start processing tool-uses. Therefore
	by the time it gets to PostToolUseHook then the assistant tool-use for that
	hook is assuredly the most recent assistant turn. We rely on the disk
	layout of {session_id}.jsonl for the main agent and {session_id}/subagents/*.jsonl
	for subagent transcripts. We keep a per-session cache "{session_id}.cache.json"
	that combines main and subagents, and parse only incremental updates to the session
	and subagent transcript files, tracking each file's byte length and protecting
	the cache with flock.

	This hook is stateless: it determines when to fire by scanning the transcript
	to identify the assistant turn index for the current tool_use_id.
	That's a lot of file to read! It tries to be fast by using string-match
	heuristics instead of json-parsing.
	"""

	import fcntl
	import glob
	import json
	import os
	import sys
	from typing import TypedDict, cast


	FIRST_MESSAGE = """\
	<system-reminder>
	There is a file LEARNINGS.md in this project.

	As you work, consult LEARNINGS.md to build on previous experience. When you
	encounter a mistake that seems like it could be common, check LEARNINGS.md
	for relevant notes — and if nothing is written yet, record what you learned.
	Guidelines:
	- Record insights about problem constraints, strategies that worked or failed,
	and lessons learned
	- Update or remove memories that turn out to be wrong or outdated
	- Organize memory semantically by topic, not chronologically
	- Keep it under 200 lines. If it's longer, use additional files
	`LEARNINGS-{TOPIC}.md` and reference them from the main file.
	- Use the Write and Edit tools to update the file
	- Since this file is project-scope and shared with your team via version
	control, tailor your memories to this project
	</system-reminder>"""

	REMINDER_MESSAGE = "<system-reminder>Consult LEARNINGS.md and update it if you learn something new.</system-reminder>"


	class CacheEntry(TypedDict):
	byte_offset: int
	latest_tool_use_ids: list[str]
	in_assistant: bool # whether the file ended mid-assistant-clump when last read
	type Cache = dict[str, CacheEntry] # relpath (relative to project dir) -> CacheEntry


	def update_cache(transcript_dir: str, session_id: str) -> Cache:
	"""Load and incrementally update the per-session cache of transcript tool_use_ids.

	Returns the updated cache (keyed by rel_path from transcript_dir).
	The cache (at transcript_dir/{session_id}.cache.json) is flock-protected
	so parallel hook invocations don't corrupt it.

	Invariant: we can trust that at the moment we're invoked, all transcript
	files for main agent and subagent have complete jsonl lines.
	"""
	cache_path = os.path.join(transcript_dir, session_id + ".cache.json")
	os.makedirs(transcript_dir, exist_ok=True)
	lock_fd = open(cache_path, "a+")
	fcntl.flock(lock_fd, fcntl.LOCK_EX)
	try:
	lock_fd.seek(0)
	raw = lock_fd.read()
	if raw.strip():
	try:
	cache = cast(Cache, json.loads(raw))
	except json.JSONDecodeError:
	cache = {}
	lock_fd.seek(0)
	lock_fd.truncate()
	lock_fd.write("{}")
	lock_fd.flush()
	else:
	cache = {}

	rel_paths = [session_id + ".jsonl"] if os.path.isfile(os.path.join(transcript_dir, session_id + ".jsonl")) else []
	for path in glob.glob(os.path.join(transcript_dir, session_id, "subagents", "*.jsonl")):
	rel_paths.append(os.path.relpath(path, transcript_dir))
	for rel_path in rel_paths:
	entry = cache.get(rel_path) or CacheEntry(byte_offset=0, latest_tool_use_ids=[], in_assistant=False)
	cache[rel_path] = _update_file_entry(transcript_dir, rel_path, entry)

	lock_fd.seek(0)
	lock_fd.truncate()
	lock_fd.write(json.dumps(cache))
	lock_fd.flush()
	return cache
	finally:
	fcntl.flock(lock_fd, fcntl.LOCK_UN)
	lock_fd.close()


	def _update_file_entry(directory: str, rel_path: str, entry: CacheEntry) -> CacheEntry:
	"""Read new bytes from a transcript file and update the cache entry."""
	full_path = os.path.join(directory, rel_path)
	try:
	file_size = os.path.getsize(full_path)
	except OSError:
	return entry
	cached_offset = entry.get("byte_offset", 0)
	if file_size == cached_offset:
	return entry
	if file_size < cached_offset:
	cached_offset = 0 # file shrunk — reparse
	try:
	with open(full_path, "rb") as f:
	f.seek(cached_offset)
	new_bytes = f.read()
	except OSError:
	return entry
	# Collect tool_use_ids from the most recent clump of consecutive assistant
	# entries. Reset when a new clump starts (assistant after non-assistant).
	latest_ids = entry.get("latest_tool_use_ids", [])
	saw_non_assistant = not entry.get("in_assistant", False)
	for line in new_bytes.split(b"\n"):
	line = line.strip()
	if not line:
	continue
	try:
	obj = json.loads(line)
	except json.JSONDecodeError:
	continue
	if obj.get("type") == "assistant":
	if saw_non_assistant:
	latest_ids = [] # new clump — discard previous
	saw_non_assistant = False
	raw_content = obj.get("message", {}).get("content", [])
	if isinstance(raw_content, list):
	content = cast(list[dict[str, object]], raw_content)
	ids = [str(b["id"]) for b in content if b.get("type") == "tool_use" and "id" in b]
	latest_ids.extend(ids)
	else:
	saw_non_assistant = True
	return {"byte_offset": file_size, "latest_tool_use_ids": latest_ids, "in_assistant": not saw_non_assistant}


	# --- Transcript analysis ---

	def every_n_turns(transcript_path: str, self_tool_use_id: str, frequency: int) -> int \| None:
	"""This function is for PostToolUseHooks that want to fire every {frequency} assistant turns.

	A "turn" is a clump of consecutive assistant lines, broken only by user lines
	(other line types like system/progress don't break a clump).
	The definition of an assistant line is type="assistant" and message.role="assistant".
	The definition of a user line is type="user" and message.role="user".
	But, I don't want to have to json-parse every single line in the (very long) transcript!
	This function uses string-matching heuristics instead.

	The function works by scanning the transcript file. We decree that the first tool_use
	after an N-turn boundary is the one that triggers the reminder.
	This function returns 0 for the first time in the transcript, >0 for subsequent times,
	and None otherwise.
	"""
	is_in_assistant_turn = False
	assistant_turn_index = -1
	min_turn_of_next_trigger = 0
	has_seen_tool_this_turn = False
	with open(transcript_path) as f:
	for line in f:
	prefix = line[:1024]
	if ',"type":"progress","data":{' in prefix:
	# to avoid the following string-match tests firing false positives
	continue
	elif '"type":"user","message":{"role":"user",' in prefix:
	is_in_assistant_turn = False
	elif '"type":"message"' in prefix and '"role":"assistant"' in prefix:
	if not is_in_assistant_turn:
	assistant_turn_index += 1
	is_in_assistant_turn = True
	has_seen_tool_this_turn = False
	if '"type":"tool_use"' in prefix:
	if assistant_turn_index >= min_turn_of_next_trigger and not has_seen_tool_this_turn:
	has_seen_tool_this_turn = True
	if self_tool_use_id in line:
	# invariant: self_tool_use_id is necessarily in the most recent assistant turn
	return min_turn_of_next_trigger
	else:
	min_turn_of_next_trigger = assistant_turn_index + frequency
	return None


	def main() -> None:
	# PostToolUseHook receive the following on its stdin
	input_data = json.loads(sys.stdin.read())
	tool_use_id: str = input_data["tool_use_id"]
	session_id: str = input_data["session_id"]
	transcript_path: str = input_data["transcript_path"]
	cwd: str = input_data["cwd"]

	learnings_path = os.path.join(cwd, "LEARNINGS.md")
	if os.path.isfile(learnings_path):
	transcript_dir = os.path.dirname(transcript_path)
	cache = update_cache(transcript_dir, session_id)
	rel_path = next((p for p, e in cache.items() if tool_use_id in e["latest_tool_use_ids"]), None)
	if rel_path is not None:
	i = every_n_turns(os.path.join(transcript_dir, rel_path), tool_use_id, 10)
	if i is not None:
	print(json.dumps({"hookSpecificOutput":{"hookEventName":"PostToolUse","additionalContext": FIRST_MESSAGE if i == 0 else REMINDER_MESSAGE}}))


	if __name__ == "__main__":
	main()
No results found