Last active
February 13, 2026 14:36
-
-
Save oneryalcin/5da8e9fe1d3d8d927ac2ccf629d5fc70 to your computer and use it in GitHub Desktop.
Extract Claude Code session JSONL to lean readable markdown
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """Extract Claude Code session JSONL to lean readable markdown. | |
| Usage: python3 claude_session_extract.py <session.jsonl> [output.md] | |
| If output.md is omitted, writes to <session_id>.md in current directory. | |
| """ | |
| import json | |
| import sys | |
| from pathlib import Path | |
| # Limits (chars) | |
| EDIT_LIMIT = 10_000 | |
| WRITE_LIMIT = 20_000 | |
| TASK_PROMPT_LIMIT = 8_000 | |
| SUBAGENT_RESULT_LIMIT = 40_000 | |
| RESULT_LIMIT = 4_000 | |
| def truncate(s, limit): | |
| s = s.strip() | |
| if len(s) > limit: | |
| return s[:limit] + f"\n... [{len(s) - limit} chars truncated]" | |
| return s | |
| def extract_text(content_inner): | |
| if isinstance(content_inner, list): | |
| return "\n".join( | |
| b.get("text", "") | |
| for b in content_inner | |
| if isinstance(b, dict) and b.get("type") == "text" | |
| ) | |
| if isinstance(content_inner, str): | |
| return content_inner | |
| return "" | |
| def format_tool_use(block, task_tool_ids): | |
| name = block.get("name", "?") | |
| inp = block.get("input", {}) | |
| tid = block.get("id", "") | |
| if name == "Task": | |
| task_tool_ids.add(tid) | |
| desc = inp.get("description", "") | |
| prompt = truncate(inp.get("prompt", ""), TASK_PROMPT_LIMIT) | |
| return f"[TOOL: Task({desc})]\n{prompt}" | |
| if name == "Edit": | |
| fp = inp.get("file_path", "") | |
| old = truncate(inp.get("old_string", ""), EDIT_LIMIT) | |
| new = truncate(inp.get("new_string", ""), EDIT_LIMIT) | |
| return f"[TOOL: Edit({fp})]\n<<<OLD\n{old}\n===\n{new}\n>>>NEW" | |
| if name == "Write": | |
| fp = inp.get("file_path", "") | |
| body = truncate(inp.get("content", ""), WRITE_LIMIT) | |
| return f"[TOOL: Write({fp})]\n```\n{body}\n```" | |
| if name in ("Read", "Glob"): | |
| summary = inp.get("file_path") or inp.get("pattern") or inp.get("path", "") | |
| return f"[TOOL: {name}({summary})]" | |
| if name == "Grep": | |
| pat = inp.get("pattern", "") | |
| path = inp.get("path", "") | |
| return f'[TOOL: Grep("{pat}" in {path})]' | |
| if name == "Bash": | |
| cmd = inp.get("command", "")[:6000] | |
| return f"[TOOL: Bash] {cmd}" | |
| if name == "NotebookEdit": | |
| fp = inp.get("notebook_path", "") | |
| src = truncate(inp.get("new_source", ""), WRITE_LIMIT) | |
| return f"[TOOL: NotebookEdit({fp})]\n```\n{src}\n```" | |
| return f"[TOOL: {name}] {json.dumps(inp)[:6000]}" | |
| def format_tool_result(block, task_tool_ids): | |
| tool_id = block.get("tool_use_id", "") | |
| is_task = tool_id in task_tool_ids | |
| limit = SUBAGENT_RESULT_LIMIT if is_task else RESULT_LIMIT | |
| label = "SUBAGENT_RESULT" if is_task else "RESULT" | |
| txt = extract_text(block.get("content", "")) | |
| if txt.strip(): | |
| return f"[{label}]\n{truncate(txt, limit)}" | |
| return "" | |
| def extract_session(infile, outfile): | |
| task_tool_ids = set() | |
| messages = [] | |
| with open(infile) as f: | |
| for raw in f: | |
| obj = json.loads(raw) | |
| if obj.get("type") not in ("user", "assistant"): | |
| continue | |
| msg = obj.get("message", {}) | |
| role = msg.get("role", obj["type"]).upper() | |
| ts = obj.get("timestamp", "") | |
| content = msg.get("content", "") | |
| parts = [] | |
| if isinstance(content, str): | |
| parts.append(content) | |
| elif isinstance(content, list): | |
| for block in content: | |
| if isinstance(block, str): | |
| parts.append(block) | |
| elif not isinstance(block, dict): | |
| continue | |
| elif block.get("type") == "text": | |
| parts.append(block.get("text", "")) | |
| elif block.get("type") == "tool_use": | |
| parts.append(format_tool_use(block, task_tool_ids)) | |
| elif block.get("type") == "tool_result": | |
| r = format_tool_result(block, task_tool_ids) | |
| if r: | |
| parts.append(r) | |
| text = "\n".join(p for p in parts if p.strip()) | |
| if text.strip(): | |
| messages.append(f"## [{role}] {ts}\n\n{text}\n") | |
| with open(outfile, "w") as f: | |
| line_num = 1 | |
| for msg in messages: | |
| for line in msg.split("\n"): | |
| f.write(f"{line_num:6d} | {line}\n") | |
| line_num += 1 | |
| f.write(f"{line_num:6d} | \n") # blank separator | |
| line_num += 1 | |
| size_kb = Path(outfile).stat().st_size / 1024 | |
| print(f"{len(messages)} messages -> {outfile} ({size_kb:.0f} KB)") | |
| if __name__ == "__main__": | |
| if len(sys.argv) < 2: | |
| print(__doc__) | |
| sys.exit(1) | |
| inpath = sys.argv[1] | |
| if len(sys.argv) >= 3: | |
| outpath = sys.argv[2] | |
| else: | |
| stem = Path(inpath).stem | |
| outpath = f"{stem}.md" | |
| extract_session(inpath, outpath) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment