Created
February 12, 2026 00:39
-
-
Save ljw1004/ebd96122641074ea884aa68e277fb7c7 to your computer and use it in GitHub Desktop.
Claude reminder hook, that works within subagents and within agentic loops
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| PostToolUse hook: periodic LEARNINGS.md reminder. | |
| On first invocation for an agent/subagent, outputs the full LEARNINGS.md | |
| instructions. Thereafter, outputs a short reminder about every 10 assistant turns. | |
| Does nothing if no LEARNINGS.md exists in the project root. | |
| This works on a per-agent or per-subagent basis. This is tricky because | |
| hooks don't tell us who they fired for. We work around this by looking for | |
| which transcript file contains this hook's tool_use_id in its most recent | |
| assistant turn. This works because claude gets the assistant response back | |
| and writes it in its entirety to the transcript file (as a sequence of assistant | |
| lines), and only afterwards does it start processing tool-uses. Therefore | |
| by the time it gets to PostToolUseHook then the assistant tool-use for that | |
| hook is assuredly the most recent assistant turn. We rely on the disk | |
| layout of {session_id}.jsonl for the main agent and {session_id}/subagents/*.jsonl | |
| for subagent transcripts. We keep a per-session cache "{session_id}.cache.json" | |
| that combines main and subagents, and parse only incremental updates to the session | |
| and subagent transcript files, tracking each file's byte length and protecting | |
| the cache with flock. | |
| This hook is stateless: it determines when to fire by scanning the transcript | |
| to identify the assistant turn index for the current tool_use_id. | |
| That's a lot of file to read! It tries to be fast by using string-match | |
| heuristics instead of json-parsing. | |
| """ | |
| import fcntl | |
| import glob | |
| import json | |
| import os | |
| import sys | |
| from typing import TypedDict, cast | |
| FIRST_MESSAGE = """\ | |
| <system-reminder> | |
| There is a file LEARNINGS.md in this project. | |
| As you work, consult LEARNINGS.md to build on previous experience. When you | |
| encounter a mistake that seems like it could be common, check LEARNINGS.md | |
| for relevant notes — and if nothing is written yet, record what you learned. | |
| Guidelines: | |
| - Record insights about problem constraints, strategies that worked or failed, | |
| and lessons learned | |
| - Update or remove memories that turn out to be wrong or outdated | |
| - Organize memory semantically by topic, not chronologically | |
| - Keep it under 200 lines. If it's longer, use additional files | |
| `LEARNINGS-{TOPIC}.md` and reference them from the main file. | |
| - Use the Write and Edit tools to update the file | |
| - Since this file is project-scope and shared with your team via version | |
| control, tailor your memories to this project | |
| </system-reminder>""" | |
| REMINDER_MESSAGE = "<system-reminder>Consult LEARNINGS.md and update it if you learn something new.</system-reminder>" | |
| class CacheEntry(TypedDict): | |
| byte_offset: int | |
| latest_tool_use_ids: list[str] | |
| in_assistant: bool # whether the file ended mid-assistant-clump when last read | |
| type Cache = dict[str, CacheEntry] # relpath (relative to project dir) -> CacheEntry | |
| def update_cache(transcript_dir: str, session_id: str) -> Cache: | |
| """Load and incrementally update the per-session cache of transcript tool_use_ids. | |
| Returns the updated cache (keyed by rel_path from transcript_dir). | |
| The cache (at transcript_dir/{session_id}.cache.json) is flock-protected | |
| so parallel hook invocations don't corrupt it. | |
| Invariant: we can trust that at the moment we're invoked, all transcript | |
| files for main agent and subagent have complete jsonl lines. | |
| """ | |
| cache_path = os.path.join(transcript_dir, session_id + ".cache.json") | |
| os.makedirs(transcript_dir, exist_ok=True) | |
| lock_fd = open(cache_path, "a+") | |
| fcntl.flock(lock_fd, fcntl.LOCK_EX) | |
| try: | |
| lock_fd.seek(0) | |
| raw = lock_fd.read() | |
| if raw.strip(): | |
| try: | |
| cache = cast(Cache, json.loads(raw)) | |
| except json.JSONDecodeError: | |
| cache = {} | |
| lock_fd.seek(0) | |
| lock_fd.truncate() | |
| lock_fd.write("{}") | |
| lock_fd.flush() | |
| else: | |
| cache = {} | |
| rel_paths = [session_id + ".jsonl"] if os.path.isfile(os.path.join(transcript_dir, session_id + ".jsonl")) else [] | |
| for path in glob.glob(os.path.join(transcript_dir, session_id, "subagents", "*.jsonl")): | |
| rel_paths.append(os.path.relpath(path, transcript_dir)) | |
| for rel_path in rel_paths: | |
| entry = cache.get(rel_path) or CacheEntry(byte_offset=0, latest_tool_use_ids=[], in_assistant=False) | |
| cache[rel_path] = _update_file_entry(transcript_dir, rel_path, entry) | |
| lock_fd.seek(0) | |
| lock_fd.truncate() | |
| lock_fd.write(json.dumps(cache)) | |
| lock_fd.flush() | |
| return cache | |
| finally: | |
| fcntl.flock(lock_fd, fcntl.LOCK_UN) | |
| lock_fd.close() | |
| def _update_file_entry(directory: str, rel_path: str, entry: CacheEntry) -> CacheEntry: | |
| """Read new bytes from a transcript file and update the cache entry.""" | |
| full_path = os.path.join(directory, rel_path) | |
| try: | |
| file_size = os.path.getsize(full_path) | |
| except OSError: | |
| return entry | |
| cached_offset = entry.get("byte_offset", 0) | |
| if file_size == cached_offset: | |
| return entry | |
| if file_size < cached_offset: | |
| cached_offset = 0 # file shrunk — reparse | |
| try: | |
| with open(full_path, "rb") as f: | |
| f.seek(cached_offset) | |
| new_bytes = f.read() | |
| except OSError: | |
| return entry | |
| # Collect tool_use_ids from the most recent clump of consecutive assistant | |
| # entries. Reset when a new clump starts (assistant after non-assistant). | |
| latest_ids = entry.get("latest_tool_use_ids", []) | |
| saw_non_assistant = not entry.get("in_assistant", False) | |
| for line in new_bytes.split(b"\n"): | |
| line = line.strip() | |
| if not line: | |
| continue | |
| try: | |
| obj = json.loads(line) | |
| except json.JSONDecodeError: | |
| continue | |
| if obj.get("type") == "assistant": | |
| if saw_non_assistant: | |
| latest_ids = [] # new clump — discard previous | |
| saw_non_assistant = False | |
| raw_content = obj.get("message", {}).get("content", []) | |
| if isinstance(raw_content, list): | |
| content = cast(list[dict[str, object]], raw_content) | |
| ids = [str(b["id"]) for b in content if b.get("type") == "tool_use" and "id" in b] | |
| latest_ids.extend(ids) | |
| else: | |
| saw_non_assistant = True | |
| return {"byte_offset": file_size, "latest_tool_use_ids": latest_ids, "in_assistant": not saw_non_assistant} | |
| # --- Transcript analysis --- | |
| def every_n_turns(transcript_path: str, self_tool_use_id: str, frequency: int) -> int | None: | |
| """This function is for PostToolUseHooks that want to fire every {frequency} assistant turns. | |
| A "turn" is a clump of consecutive assistant lines, broken only by user lines | |
| (other line types like system/progress don't break a clump). | |
| The definition of an assistant line is type="assistant" and message.role="assistant". | |
| The definition of a user line is type="user" and message.role="user". | |
| But, I don't want to have to json-parse every single line in the (very long) transcript! | |
| This function uses string-matching heuristics instead. | |
| The function works by scanning the transcript file. We decree that the first tool_use | |
| after an N-turn boundary is the one that triggers the reminder. | |
| This function returns 0 for the first time in the transcript, >0 for subsequent times, | |
| and None otherwise. | |
| """ | |
| is_in_assistant_turn = False | |
| assistant_turn_index = -1 | |
| min_turn_of_next_trigger = 0 | |
| has_seen_tool_this_turn = False | |
| with open(transcript_path) as f: | |
| for line in f: | |
| prefix = line[:1024] | |
| if ',"type":"progress","data":{' in prefix: | |
| # to avoid the following string-match tests firing false positives | |
| continue | |
| elif '"type":"user","message":{"role":"user",' in prefix: | |
| is_in_assistant_turn = False | |
| elif '"type":"message"' in prefix and '"role":"assistant"' in prefix: | |
| if not is_in_assistant_turn: | |
| assistant_turn_index += 1 | |
| is_in_assistant_turn = True | |
| has_seen_tool_this_turn = False | |
| if '"type":"tool_use"' in prefix: | |
| if assistant_turn_index >= min_turn_of_next_trigger and not has_seen_tool_this_turn: | |
| has_seen_tool_this_turn = True | |
| if self_tool_use_id in line: | |
| # invariant: self_tool_use_id is necessarily in the most recent assistant turn | |
| return min_turn_of_next_trigger | |
| else: | |
| min_turn_of_next_trigger = assistant_turn_index + frequency | |
| return None | |
| def main() -> None: | |
| # PostToolUseHook receive the following on its stdin | |
| input_data = json.loads(sys.stdin.read()) | |
| tool_use_id: str = input_data["tool_use_id"] | |
| session_id: str = input_data["session_id"] | |
| transcript_path: str = input_data["transcript_path"] | |
| cwd: str = input_data["cwd"] | |
| learnings_path = os.path.join(cwd, "LEARNINGS.md") | |
| if os.path.isfile(learnings_path): | |
| transcript_dir = os.path.dirname(transcript_path) | |
| cache = update_cache(transcript_dir, session_id) | |
| rel_path = next((p for p, e in cache.items() if tool_use_id in e["latest_tool_use_ids"]), None) | |
| if rel_path is not None: | |
| i = every_n_turns(os.path.join(transcript_dir, rel_path), tool_use_id, 10) | |
| if i is not None: | |
| print(json.dumps({"hookSpecificOutput":{"hookEventName":"PostToolUse","additionalContext": FIRST_MESSAGE if i == 0 else REMINDER_MESSAGE}})) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment