Skip to content

Instantly share code, notes, and snippets.

@ljw1004
Created February 12, 2026 00:39
Show Gist options
  • Select an option

  • Save ljw1004/ebd96122641074ea884aa68e277fb7c7 to your computer and use it in GitHub Desktop.

Select an option

Save ljw1004/ebd96122641074ea884aa68e277fb7c7 to your computer and use it in GitHub Desktop.
Claude reminder hook, that works within subagents and within agentic loops
#!/usr/bin/env python3
"""
PostToolUse hook: periodic LEARNINGS.md reminder.
On first invocation for an agent/subagent, outputs the full LEARNINGS.md
instructions. Thereafter, outputs a short reminder about every 10 assistant turns.
Does nothing if no LEARNINGS.md exists in the project root.
This works on a per-agent or per-subagent basis. This is tricky because
hooks don't tell us who they fired for. We work around this by looking for
which transcript file contains this hook's tool_use_id in its most recent
assistant turn. This works because claude gets the assistant response back
and writes it in its entirety to the transcript file (as a sequence of assistant
lines), and only afterwards does it start processing tool-uses. Therefore
by the time it gets to PostToolUseHook then the assistant tool-use for that
hook is assuredly the most recent assistant turn. We rely on the disk
layout of {session_id}.jsonl for the main agent and {session_id}/subagents/*.jsonl
for subagent transcripts. We keep a per-session cache "{session_id}.cache.json"
that combines main and subagents, and parse only incremental updates to the session
and subagent transcript files, tracking each file's byte length and protecting
the cache with flock.
This hook is stateless: it determines when to fire by scanning the transcript
to identify the assistant turn index for the current tool_use_id.
That's a lot of file to read! It tries to be fast by using string-match
heuristics instead of json-parsing.
"""
import fcntl
import glob
import json
import os
import sys
from typing import TypedDict, cast
FIRST_MESSAGE = """\
<system-reminder>
There is a file LEARNINGS.md in this project.
As you work, consult LEARNINGS.md to build on previous experience. When you
encounter a mistake that seems like it could be common, check LEARNINGS.md
for relevant notes — and if nothing is written yet, record what you learned.
Guidelines:
- Record insights about problem constraints, strategies that worked or failed,
and lessons learned
- Update or remove memories that turn out to be wrong or outdated
- Organize memory semantically by topic, not chronologically
- Keep it under 200 lines. If it's longer, use additional files
`LEARNINGS-{TOPIC}.md` and reference them from the main file.
- Use the Write and Edit tools to update the file
- Since this file is project-scope and shared with your team via version
control, tailor your memories to this project
</system-reminder>"""
REMINDER_MESSAGE = "<system-reminder>Consult LEARNINGS.md and update it if you learn something new.</system-reminder>"
class CacheEntry(TypedDict):
byte_offset: int
latest_tool_use_ids: list[str]
in_assistant: bool # whether the file ended mid-assistant-clump when last read
type Cache = dict[str, CacheEntry] # relpath (relative to project dir) -> CacheEntry
def update_cache(transcript_dir: str, session_id: str) -> Cache:
"""Load and incrementally update the per-session cache of transcript tool_use_ids.
Returns the updated cache (keyed by rel_path from transcript_dir).
The cache (at transcript_dir/{session_id}.cache.json) is flock-protected
so parallel hook invocations don't corrupt it.
Invariant: we can trust that at the moment we're invoked, all transcript
files for main agent and subagent have complete jsonl lines.
"""
cache_path = os.path.join(transcript_dir, session_id + ".cache.json")
os.makedirs(transcript_dir, exist_ok=True)
lock_fd = open(cache_path, "a+")
fcntl.flock(lock_fd, fcntl.LOCK_EX)
try:
lock_fd.seek(0)
raw = lock_fd.read()
if raw.strip():
try:
cache = cast(Cache, json.loads(raw))
except json.JSONDecodeError:
cache = {}
lock_fd.seek(0)
lock_fd.truncate()
lock_fd.write("{}")
lock_fd.flush()
else:
cache = {}
rel_paths = [session_id + ".jsonl"] if os.path.isfile(os.path.join(transcript_dir, session_id + ".jsonl")) else []
for path in glob.glob(os.path.join(transcript_dir, session_id, "subagents", "*.jsonl")):
rel_paths.append(os.path.relpath(path, transcript_dir))
for rel_path in rel_paths:
entry = cache.get(rel_path) or CacheEntry(byte_offset=0, latest_tool_use_ids=[], in_assistant=False)
cache[rel_path] = _update_file_entry(transcript_dir, rel_path, entry)
lock_fd.seek(0)
lock_fd.truncate()
lock_fd.write(json.dumps(cache))
lock_fd.flush()
return cache
finally:
fcntl.flock(lock_fd, fcntl.LOCK_UN)
lock_fd.close()
def _update_file_entry(directory: str, rel_path: str, entry: CacheEntry) -> CacheEntry:
"""Read new bytes from a transcript file and update the cache entry."""
full_path = os.path.join(directory, rel_path)
try:
file_size = os.path.getsize(full_path)
except OSError:
return entry
cached_offset = entry.get("byte_offset", 0)
if file_size == cached_offset:
return entry
if file_size < cached_offset:
cached_offset = 0 # file shrunk — reparse
try:
with open(full_path, "rb") as f:
f.seek(cached_offset)
new_bytes = f.read()
except OSError:
return entry
# Collect tool_use_ids from the most recent clump of consecutive assistant
# entries. Reset when a new clump starts (assistant after non-assistant).
latest_ids = entry.get("latest_tool_use_ids", [])
saw_non_assistant = not entry.get("in_assistant", False)
for line in new_bytes.split(b"\n"):
line = line.strip()
if not line:
continue
try:
obj = json.loads(line)
except json.JSONDecodeError:
continue
if obj.get("type") == "assistant":
if saw_non_assistant:
latest_ids = [] # new clump — discard previous
saw_non_assistant = False
raw_content = obj.get("message", {}).get("content", [])
if isinstance(raw_content, list):
content = cast(list[dict[str, object]], raw_content)
ids = [str(b["id"]) for b in content if b.get("type") == "tool_use" and "id" in b]
latest_ids.extend(ids)
else:
saw_non_assistant = True
return {"byte_offset": file_size, "latest_tool_use_ids": latest_ids, "in_assistant": not saw_non_assistant}
# --- Transcript analysis ---
def every_n_turns(transcript_path: str, self_tool_use_id: str, frequency: int) -> int | None:
"""This function is for PostToolUseHooks that want to fire every {frequency} assistant turns.
A "turn" is a clump of consecutive assistant lines, broken only by user lines
(other line types like system/progress don't break a clump).
The definition of an assistant line is type="assistant" and message.role="assistant".
The definition of a user line is type="user" and message.role="user".
But, I don't want to have to json-parse every single line in the (very long) transcript!
This function uses string-matching heuristics instead.
The function works by scanning the transcript file. We decree that the first tool_use
after an N-turn boundary is the one that triggers the reminder.
This function returns 0 for the first time in the transcript, >0 for subsequent times,
and None otherwise.
"""
is_in_assistant_turn = False
assistant_turn_index = -1
min_turn_of_next_trigger = 0
has_seen_tool_this_turn = False
with open(transcript_path) as f:
for line in f:
prefix = line[:1024]
if ',"type":"progress","data":{' in prefix:
# to avoid the following string-match tests firing false positives
continue
elif '"type":"user","message":{"role":"user",' in prefix:
is_in_assistant_turn = False
elif '"type":"message"' in prefix and '"role":"assistant"' in prefix:
if not is_in_assistant_turn:
assistant_turn_index += 1
is_in_assistant_turn = True
has_seen_tool_this_turn = False
if '"type":"tool_use"' in prefix:
if assistant_turn_index >= min_turn_of_next_trigger and not has_seen_tool_this_turn:
has_seen_tool_this_turn = True
if self_tool_use_id in line:
# invariant: self_tool_use_id is necessarily in the most recent assistant turn
return min_turn_of_next_trigger
else:
min_turn_of_next_trigger = assistant_turn_index + frequency
return None
def main() -> None:
# PostToolUseHook receive the following on its stdin
input_data = json.loads(sys.stdin.read())
tool_use_id: str = input_data["tool_use_id"]
session_id: str = input_data["session_id"]
transcript_path: str = input_data["transcript_path"]
cwd: str = input_data["cwd"]
learnings_path = os.path.join(cwd, "LEARNINGS.md")
if os.path.isfile(learnings_path):
transcript_dir = os.path.dirname(transcript_path)
cache = update_cache(transcript_dir, session_id)
rel_path = next((p for p, e in cache.items() if tool_use_id in e["latest_tool_use_ids"]), None)
if rel_path is not None:
i = every_n_turns(os.path.join(transcript_dir, rel_path), tool_use_id, 10)
if i is not None:
print(json.dumps({"hookSpecificOutput":{"hookEventName":"PostToolUse","additionalContext": FIRST_MESSAGE if i == 0 else REMINDER_MESSAGE}}))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment