|
#!/usr/bin/env python3 |
|
""" |
|
Rebuild Claude Code's session index from the per-session JSONL files. |
|
""" |
|
|
|
from __future__ import annotations |
|
|
|
import argparse |
|
import datetime |
|
import json |
|
import os |
|
import textwrap |
|
from pathlib import Path |
|
from typing import Iterable, Iterator, Mapping, MutableMapping, Optional, Sequence |
|
|
|
|
|
def parse_timestamp(value: Optional[str]) -> Optional[datetime.datetime]: |
|
if not value: |
|
return None |
|
value = value.strip() |
|
if not value: |
|
return None |
|
if value.endswith("Z"): |
|
value = value[:-1] + "+00:00" |
|
try: |
|
dt = datetime.datetime.fromisoformat(value) |
|
except ValueError: |
|
try: |
|
dt = datetime.datetime.fromtimestamp(float(value), tz=datetime.timezone.utc) |
|
except Exception: |
|
return None |
|
if dt.tzinfo is None: |
|
dt = dt.replace(tzinfo=datetime.timezone.utc) |
|
return dt |
|
|
|
|
|
def isoformat_utc(dt: Optional[datetime.datetime]) -> Optional[str]: |
|
if not dt: |
|
return None |
|
if dt.tzinfo is None: |
|
dt = dt.replace(tzinfo=datetime.timezone.utc) |
|
return dt.astimezone(datetime.timezone.utc).isoformat(timespec="milliseconds").replace( |
|
"+00:00", "Z" |
|
) |
|
|
|
|
|
def clean_text(raw: Optional[str], default: str) -> str: |
|
if not raw: |
|
return default |
|
normalized = " ".join(raw.split()) |
|
if not normalized: |
|
return default |
|
return textwrap.shorten(normalized, width=220, placeholder="…") |
|
|
|
|
|
def unpack_content(content) -> str: |
|
if isinstance(content, str): |
|
return content |
|
if isinstance(content, list): |
|
return "".join(unpack_content(item) for item in content) |
|
if isinstance(content, Mapping): |
|
for key in ("text", "content", "output"): |
|
if key in content: |
|
return unpack_content(content[key]) |
|
return "".join(f"{key}:{value}" for key, value in content.items()) |
|
return str(content) |
|
|
|
|
|
def extract_message_text(record: Mapping) -> str: |
|
message = record.get("message") or {} |
|
return unpack_content(message.get("content")) |
|
|
|
|
|
def scan_jsonl(path: Path) -> Mapping[str, object]: |
|
session_id: Optional[str] = None |
|
first_prompt: Optional[str] = None |
|
summary: Optional[str] = None |
|
message_count = 0 |
|
earliest: Optional[datetime.datetime] = None |
|
latest: Optional[datetime.datetime] = None |
|
git_branch: Optional[str] = None |
|
project_path: Optional[str] = None |
|
is_sidechain = False |
|
|
|
with path.open(encoding="utf-8") as fh: |
|
for line in fh: |
|
line = line.strip() |
|
if not line: |
|
continue |
|
try: |
|
record = json.loads(line) |
|
except json.JSONDecodeError: |
|
continue |
|
|
|
if session_id is None: |
|
session_id = record.get("sessionId") |
|
if project_path is None: |
|
project_path = record.get("cwd") |
|
if git_branch is None: |
|
git_branch = record.get("gitBranch") |
|
if record.get("isSidechain"): |
|
is_sidechain = True |
|
|
|
if record.get("type") in ("user", "assistant"): |
|
message_count += 1 |
|
if record.get("type") == "user" and not first_prompt: |
|
raw = extract_message_text(record) |
|
first_prompt = raw |
|
|
|
if record.get("type") == "summary" and isinstance(record.get("summary"), str): |
|
summary = record["summary"] |
|
|
|
ts = record.get("timestamp") |
|
if not ts and record.get("snapshot") and isinstance(record["snapshot"], Mapping): |
|
ts = record["snapshot"].get("timestamp") |
|
|
|
dt = parse_timestamp(ts) |
|
if dt: |
|
if earliest is None or dt < earliest: |
|
earliest = dt |
|
if latest is None or dt > latest: |
|
latest = dt |
|
|
|
return { |
|
"session_id": session_id or path.stem, |
|
"first_prompt": clean_text(first_prompt, "No prompt"), |
|
"summary": clean_text(summary or "", ""), |
|
"message_count": message_count, |
|
"created": earliest, |
|
"modified": latest, |
|
"git_branch": git_branch or "", |
|
"project_path": project_path or "", |
|
"is_sidechain": is_sidechain, |
|
} |
|
|
|
|
|
def rebuild_index( |
|
project_dir: Path, original_path: Path, dry_run: bool = False, backup: bool = False |
|
) -> None: |
|
project_dir = project_dir.expanduser().resolve() |
|
original_path = original_path.resolve() |
|
jsonl_files = sorted(project_dir.glob("*.jsonl")) |
|
entries = [] |
|
|
|
for jsonl in jsonl_files: |
|
if jsonl.name == "sessions-index.json": |
|
continue |
|
meta = scan_jsonl(jsonl) |
|
stat = jsonl.stat() |
|
entry = { |
|
"sessionId": meta["session_id"], |
|
"fullPath": str(jsonl), |
|
"fileMtime": int(stat.st_mtime * 1000), |
|
"firstPrompt": meta["first_prompt"], |
|
"summary": meta["summary"], |
|
"messageCount": meta["message_count"], |
|
"created": isoformat_utc(meta["created"]) or "", |
|
"modified": isoformat_utc(meta["modified"]) or "", |
|
"gitBranch": meta["git_branch"], |
|
"projectPath": meta["project_path"] or str(original_path), |
|
"isSidechain": meta["is_sidechain"], |
|
"_sort": meta["modified"] |
|
or datetime.datetime.min.replace(tzinfo=datetime.timezone.utc), |
|
} |
|
entries.append(entry) |
|
|
|
entries.sort(key=lambda r: r["_sort"], reverse=True) |
|
for entry in entries: |
|
entry.pop("_sort", None) |
|
|
|
index = { |
|
"version": 1, |
|
"entries": entries, |
|
"originalPath": str(original_path), |
|
} |
|
|
|
index_path = project_dir / "sessions-index.json" |
|
if dry_run: |
|
print(f"[dry run] rebuilt {len(entries)} entries for {project_dir}") |
|
return |
|
|
|
if backup and index_path.exists(): |
|
bak_path = index_path.with_name(index_path.name + ".bak") |
|
index_path.replace(bak_path) |
|
|
|
with index_path.open("w", encoding="utf-8") as fh: |
|
json.dump(index, fh, indent=2) |
|
fh.write("\n") |
|
print(f"Rebuilt sessions-index.json ({len(entries)} entries) at {index_path}") |
|
|
|
|
|
def main(argv: Optional[Sequence[str]] = None) -> None: |
|
parser = argparse.ArgumentParser( |
|
description="Rebuild the Claude Code sessions-index.json from raw JSONL files." |
|
) |
|
parser.add_argument( |
|
"project_dir", |
|
type=Path, |
|
nargs="?", |
|
default=Path("~/.claude/projects").expanduser(), |
|
help="Claude project directory (contains *.jsonl sessions)", |
|
) |
|
parser.add_argument( |
|
"--original-path", |
|
type=Path, |
|
default=Path.cwd(), |
|
help="Path recorded as the operator's project in the rebuilt index", |
|
) |
|
parser.add_argument("--dry-run", action="store_true", help="Scan files but do not write the index") |
|
parser.add_argument( |
|
"--backup", |
|
action="store_true", |
|
help="Move the existing sessions-index.json to sessions-index.json.bak before writing", |
|
) |
|
args = parser.parse_args(argv) |
|
rebuild_index(args.project_dir, args.original_path, dry_run=args.dry_run, backup=args.backup) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |