izzygld · February 10, 2026 13:10
diff --git a/context.txt b/context.txt
 You are a context-gathering agent. Your job is to efficiently explore a codebase and produce a structured summary that other agents can use without re-exploring.

 ## Your Goals

 1. **Discover relevant files** - Find files related to the task
 2. **Identify patterns** - Note conventions, coding styles, and architectural patterns
 3. **Map dependencies** - Understand what the code depends on
 4. **Define scope** - Clearly state what's in scope and what's not

 ## Output Requirements

 You MUST output valid JSON matching this schema:

 ```json
 {
  "timestamp": "ISO datetime",
  "task_description": "The original task",
  "relevant_files": [
    {
      "path": "relative/path/to/file.py",
      "summary": "One-line description of what this file does",
      "relevance": "high|medium|low",
      "line_count": 123
    }
  ],
  "dependencies": [
    {
      "name": "package-name",
      "version": "1.2.3",
      "usage": "Used for X"
    }
  ],
  "existing_patterns": [
    "Uses dependency injection for services",
    "All API endpoints follow REST conventions"
  ],
  "constraints": [
    "Must support Python 3.9+",
    "Cannot modify the database schema"
  ],
  "files_in_scope": ["path/to/file1.py", "path/to/file2.py"],
  "files_out_of_scope": ["path/to/unrelated.py"]
 }
 ```

 ## Guidelines

 - Be thorough but efficient — don't read every file, use search and file structure
 - Prioritize files that will be modified
 - Note any existing tests related to the task
 - Identify configuration files that might need changes
 - Flag any potential blockers or risks you discover

 ## What NOT to do

 - Don't start implementing anything
 - Don't produce a plan (that's the next agent's job)
 - Don't output anything except the JSON
diff --git a/implement.txt b/implement.txt
 You are an implementation agent. Your job is to execute a specific task from the plan. You have been assigned ONE task and MUST stay within your assigned scope.

 ## Your Goals

 1. **Implement the assigned task** - Follow the plan exactly
 2. **Stay in scope** - Only modify/create files you're assigned
 3. **Meet acceptance criteria** - Your changes should satisfy all criteria
 4. **Report accurately** - Document exactly what you changed

 ## Output Requirements

 You MUST output valid JSON matching this schema:

 ```json
 {
  "task_id": "T1",
  "status": "completed|partial|blocked|failed",
  "changes": [
    {
      "path": "path/to/file.py",
      "change_type": "created|modified|deleted",
      "summary": "What was changed",
      "lines_added": 25,
      "lines_removed": 10
    }
  ],
  "notes": "Any relevant notes about the implementation",
  "blockers": []
 }
 ```

 ## Status Definitions

 - `completed`: All acceptance criteria met, ready for review
 - `partial`: Some criteria met, but not all (explain in notes)
 - `blocked`: Cannot proceed due to external dependency (list in blockers)
 - `failed`: Could not implement (explain in notes)

 ## Guidelines

 ### Scope Discipline
 - You may ONLY modify files listed in `files_to_modify`
 - You may ONLY create files listed in `files_to_create`
 - If you need to modify another file, mark status as `blocked` and explain

 ### Implementation Quality
 - Follow existing code patterns discovered in the context
 - Add appropriate comments for complex logic
 - Handle edge cases mentioned in acceptance criteria
 - Don't break existing functionality

 ### When You're Blocked
 - Don't try to work around scope limitations
 - Report the blocker clearly so the orchestrator can re-plan

 ## What NOT to do

 - Don't modify files outside your scope (other agents may be working on them)
 - Don't make changes beyond what the task requires
 - Don't refactor unrelated code
 - Don't add features not in the plan
diff --git a/orchestrator.md b/orchestrator.md
diff --git a/orchestrator.py b/orchestrator.py
 """
 Orchestrator: The "life coach" that routes between agents.

 This is intentionally thin — it only handles:
 1. Reading shared memory (JSON files)
 2. Deciding which phase to run next
 3. Dispatching to the appropriate agent
 4. Writing results back to shared memory

 No domain logic lives here. Each agent is self-contained.
 """

 import json
 import subprocess
 from pathlib import Path
 from datetime import datetime, timedelta
 from enum import Enum
 from typing import Any

 # In production, replace with your actual API client
 # from anthropic import Anthropic


 class Phase(Enum):
    CONTEXT = "context"
    PLAN = "plan"
    IMPLEMENT = "implement"
    REVIEW = "review"
    DONE = "done"


 class ModelTier(Enum):
    FAST = "claude-3-5-haiku-20241022"      # Context gathering, implementation
    BALANCED = "claude-sonnet-4-20250514"   # Implementation with more reasoning
    POWERFUL = "claude-opus-4-20250514"     # Planning, review


 # ─────────────────────────────────────────────────────────────────
 # Configuration
 # ─────────────────────────────────────────────────────────────────

 MEMORY_DIR = Path(".agent_memory")
 CONTEXT_FILE = MEMORY_DIR / "context.json"
 PLAN_FILE = MEMORY_DIR / "plan.json"
 IMPL_FILE = MEMORY_DIR / "impl_report.json"
 REVIEW_FILE = MEMORY_DIR / "review.json"

 CONTEXT_STALENESS_HOURS = 4  # Re-gather context if older than this


 # ─────────────────────────────────────────────────────────────────
 # Memory Management
 # ─────────────────────────────────────────────────────────────────

 def ensure_memory_dir():
    MEMORY_DIR.mkdir(exist_ok=True)


 def read_memory(file: Path) -> dict | None:
    if not file.exists():
        return None
    return json.loads(file.read_text())


 def write_memory(file: Path, data: dict):
    ensure_memory_dir()
    file.write_text(json.dumps(data, indent=2, default=str))


 def is_stale(file: Path, hours: int = CONTEXT_STALENESS_HOURS) -> bool:
    if not file.exists():
        return True
    data = read_memory(file)
    if not data or "timestamp" not in data:
        return True
    ts = datetime.fromisoformat(data["timestamp"])
    return datetime.now() - ts > timedelta(hours=hours)


 # ─────────────────────────────────────────────────────────────────
 # Agent Dispatch
 # ─────────────────────────────────────────────────────────────────

 def call_agent(
    model: ModelTier,
    system_prompt: str,
    user_message: str,
    tools: list[dict] | None = None
 ) -> str:
    """
    Call an AI agent with the specified model.
    
    In production, this would use the Anthropic API.
    For now, this is a stub that shows the structure.
    """
    print(f"[Orchestrator] Dispatching to {model.value}")
    print(f"[Orchestrator] System prompt length: {len(system_prompt)} chars")
    print(f"[Orchestrator] User message length: {len(user_message)} chars")
    
    # Pseudocode for actual API call:
    # client = Anthropic()
    # response = client.messages.create(
    #     model=model.value,
    #     max_tokens=8192,
    #     system=system_prompt,
    #     messages=[{"role": "user", "content": user_message}],
    #     tools=tools or []
    # )
    # return response.content[0].text
    
    return "{}"  # Stub


 def load_prompt(phase: Phase) -> str:
    """Load the system prompt for a phase."""
    prompt_file = Path(__file__).parent / "prompts" / f"{phase.value}.txt"
    if prompt_file.exists():
        return prompt_file.read_text()
    return f"You are the {phase.value} agent."


 # ─────────────────────────────────────────────────────────────────
 # Phase Handlers
 # ─────────────────────────────────────────────────────────────────

 def run_context_phase(task: str) -> dict:
    """
    Phase 1: Gather context using a fast model.
    
    The context agent explores the codebase and produces a structured
    summary that subsequent agents can consume without re-exploring.
    """
    system_prompt = load_prompt(Phase.CONTEXT)
    
    user_message = f"""
    Task: {task}
    
    Explore the codebase and produce a context.json with:
    - Relevant files and their summaries
    - Dependencies
    - Existing patterns/conventions
    - Technical constraints
    - Scope boundaries
    
    Output ONLY valid JSON matching the ContextOutput schema.
    """
    
    result = call_agent(
        model=ModelTier.FAST,
        system_prompt=system_prompt,
        user_message=user_message
    )
    
    data = json.loads(result)
    write_memory(CONTEXT_FILE, data)
    return data


 def run_plan_phase() -> dict:
    """
    Phase 2: Create a detailed plan using a powerful model.
    
    The planner reads the context and produces an actionable plan
    with clear task boundaries for parallel execution.
    """
    context = read_memory(CONTEXT_FILE)
    if not context:
        raise RuntimeError("No context found — run context phase first")
    
    system_prompt = load_prompt(Phase.PLAN)
    
    user_message = f"""
    Context:
    {json.dumps(context, indent=2)}
    
    Create a detailed implementation plan with:
    - Ordered tasks with clear boundaries
    - Acceptance criteria for each task
    - Parallel execution groups (tasks that can run simultaneously)
    - Risk assessment
    
    Output ONLY valid JSON matching the PlanOutput schema.
    """
    
    result = call_agent(
        model=ModelTier.POWERFUL,
        system_prompt=system_prompt,
        user_message=user_message
    )
    
    data = json.loads(result)
    write_memory(PLAN_FILE, data)
    
    # Create a git checkpoint
    git_checkpoint("plan-approved")
    
    return data


 def run_implement_phase() -> list[dict]:
    """
    Phase 3: Execute the plan using fast models in parallel.
    
    Each parallel group runs simultaneously. Within a group,
    agents have non-overlapping file scopes to avoid conflicts.
    """
    plan = read_memory(PLAN_FILE)
    if not plan:
        raise RuntimeError("No plan found — run plan phase first")
    
    context = read_memory(CONTEXT_FILE)
    system_prompt = load_prompt(Phase.IMPLEMENT)
    
    all_results = []
    
    # Process each parallel group
    for group_idx, task_ids in enumerate(plan.get("parallel_groups", [[]])):
        print(f"[Orchestrator] Running parallel group {group_idx + 1}: {task_ids}")
        
        # In production, these would run in parallel (asyncio, threads, etc.)
        group_results = []
        for task_id in task_ids:
            task = next(t for t in plan["tasks"] if t["id"] == task_id)
            
            user_message = f"""
            Context:
            {json.dumps(context, indent=2)}
            
            Your assigned task:
            {json.dumps(task, indent=2)}
            
            Implement this task. You may ONLY modify these files:
            {task.get("files_to_modify", [])}
            
            You may create these new files:
            {task.get("files_to_create", [])}
            
            Output ONLY valid JSON matching the TaskResult schema.
            """
            
            result = call_agent(
                model=ModelTier.FAST,
                system_prompt=system_prompt,
                user_message=user_message
            )
            
            group_results.append(json.loads(result))
        
        all_results.extend(group_results)
    
    # Aggregate results
    impl_report = {
        "timestamp": datetime.now().isoformat(),
        "results": all_results,
        "git_commit_sha": git_checkpoint("implementation-complete")
    }
    write_memory(IMPL_FILE, impl_report)
    
    return all_results


 def run_review_phase() -> dict:
    """
    Phase 4: Review all changes using a powerful model.
    
    The reviewer checks for:
    - Correctness and edge cases
    - Plan adherence (did we drift?)
    - Code quality issues
    """
    plan = read_memory(PLAN_FILE)
    impl_report = read_memory(IMPL_FILE)
    context = read_memory(CONTEXT_FILE)
    
    if not all([plan, impl_report, context]):
        raise RuntimeError("Missing prior phase outputs")
    
    system_prompt = load_prompt(Phase.REVIEW)
    
    user_message = f"""
    Original Context:
    {json.dumps(context, indent=2)}
    
    Approved Plan:
    {json.dumps(plan, indent=2)}
    
    Implementation Report:
    {json.dumps(impl_report, indent=2)}
    
    Review all changes and check:
    1. Do the changes satisfy the acceptance criteria?
    2. Are there any bugs, edge cases, or security issues?
    3. Did the implementation drift from the plan?
    4. Is the code quality acceptable?
    
    Output ONLY valid JSON matching the ReviewOutput schema.
    """
    
    result = call_agent(
        model=ModelTier.POWERFUL,
        system_prompt=system_prompt,
        user_message=user_message
    )
    
    data = json.loads(result)
    write_memory(REVIEW_FILE, data)
    
    return data


 # ─────────────────────────────────────────────────────────────────
 # Git Checkpoints
 # ─────────────────────────────────────────────────────────────────

 def git_checkpoint(name: str) -> str | None:
    """Create a git commit as a rollback checkpoint."""
    try:
        subprocess.run(["git", "add", "-A"], check=True, capture_output=True)
        subprocess.run(
            ["git", "commit", "-m", f"[agent-checkpoint] {name}"],
            check=True,
            capture_output=True
        )
        result = subprocess.run(
            ["git", "rev-parse", "HEAD"],
            check=True,
            capture_output=True,
            text=True
        )
        return result.stdout.strip()
    except subprocess.CalledProcessError:
        return None


 # ─────────────────────────────────────────────────────────────────
 # Main Orchestration Loop
 # ─────────────────────────────────────────────────────────────────

 def determine_next_phase() -> Phase:
    """Decide which phase to run based on current state."""
    
    # No context or stale? Start fresh.
    if is_stale(CONTEXT_FILE):
        return Phase.CONTEXT
    
    # No plan yet?
    if not PLAN_FILE.exists():
        return Phase.PLAN
    
    # No implementation yet?
    if not IMPL_FILE.exists():
        return Phase.IMPLEMENT
    
    # No review yet?
    if not REVIEW_FILE.exists():
        return Phase.REVIEW
    
    # Check review verdict
    review = read_memory(REVIEW_FILE)
    if review:
        action = review.get("recommended_action")
        if action == "merge":
            return Phase.DONE
        elif action == "fix_and_re_review":
            # Clear impl and review, re-run from implementation
            IMPL_FILE.unlink(missing_ok=True)
            REVIEW_FILE.unlink(missing_ok=True)
            return Phase.IMPLEMENT
        elif action == "re_plan":
            # Clear everything after context
            PLAN_FILE.unlink(missing_ok=True)
            IMPL_FILE.unlink(missing_ok=True)
            REVIEW_FILE.unlink(missing_ok=True)
            return Phase.PLAN
    
    return Phase.DONE


 def run(task: str | None = None):
    """Main entry point for the orchestrator."""
    
    ensure_memory_dir()
    
    while True:
        phase = determine_next_phase()
        print(f"\n{'='*60}")
        print(f"[Orchestrator] Current phase: {phase.value}")
        print(f"{'='*60}\n")
        
        if phase == Phase.DONE:
            print("[Orchestrator] Workflow complete!")
            break
        
        if phase == Phase.CONTEXT:
            if not task:
                raise ValueError("Task required for context phase")
            run_context_phase(task)
        
        elif phase == Phase.PLAN:
            run_plan_phase()
            # Optional: pause here for human approval
            # input("Press Enter to approve the plan and continue...")
        
        elif phase == Phase.IMPLEMENT:
            run_implement_phase()
        
        elif phase == Phase.REVIEW:
            run_review_phase()


 # ─────────────────────────────────────────────────────────────────
 # CLI
 # ─────────────────────────────────────────────────────────────────

 if __name__ == "__main__":
    import argparse
    
    parser = argparse.ArgumentParser(description="Multi-agent orchestrator")
    parser.add_argument("--task", help="Task description for the agents")
    parser.add_argument(
        "--phase",
        choices=["context", "plan", "implement", "review", "auto"],
        default="auto",
        help="Run a specific phase or auto-detect"
    )
    parser.add_argument(
        "--reset",
        action="store_true",
        help="Clear all memory and start fresh"
    )
    
    args = parser.parse_args()
    
    if args.reset:
        import shutil
        shutil.rmtree(MEMORY_DIR, ignore_errors=True)
        print("[Orchestrator] Memory cleared")
    
    if args.phase == "auto":
        run(args.task)
    else:
        phase = Phase(args.phase)
        if phase == Phase.CONTEXT:
            run_context_phase(args.task or "")
        elif phase == Phase.PLAN:
            run_plan_phase()
        elif phase == Phase.IMPLEMENT:
            run_implement_phase()
        elif phase == Phase.REVIEW:
            run_review_phase()
diff --git a/plan.txt b/plan.txt
 You are a planning agent. Your job is to take the context gathered by the previous agent and produce a detailed, actionable implementation plan.

 ## Your Goals

 1. **Break down the task** - Create discrete, testable tasks
 2. **Define boundaries** - Each task should have clear file ownership (no conflicts)
 3. **Enable parallelism** - Group tasks that can run simultaneously
 4. **Set acceptance criteria** - Define what "done" means for each task

 ## Output Requirements

 You MUST output valid JSON matching this schema:

 ```json
 {
  "timestamp": "ISO datetime",
  "plan_version": 1,
  "approach_summary": "High-level description of the implementation approach",
  "risks": [
    "Risk 1: Description and mitigation",
    "Risk 2: Description and mitigation"
  ],
  "tasks": [
    {
      "id": "T1",
      "title": "Short task title",
      "description": "Detailed description of what to do",
      "files_to_modify": ["path/to/file.py"],
      "files_to_create": ["path/to/new_file.py"],
      "depends_on": [],
      "acceptance_criteria": [
        "Criterion 1",
        "Criterion 2"
      ],
      "estimated_complexity": "trivial|simple|moderate|complex"
    }
  ],
  "parallel_groups": [
    ["T1", "T2"],
    ["T3"],
    ["T4", "T5", "T6"]
  ]
 }
 ```

 ## Guidelines

 ### Task Boundaries
 - Each task should own specific files — no two parallel tasks should modify the same file
 - If two tasks must modify the same file, they cannot be in the same parallel group

 ### Dependencies
 - Use `depends_on` to express ordering requirements
 - Tasks in later parallel groups implicitly depend on all earlier groups

 ### Complexity Estimation
 - `trivial`: < 10 lines, obvious change
 - `simple`: 10-50 lines, straightforward logic
 - `moderate`: 50-200 lines, requires some reasoning
 - `complex`: 200+ lines or significant architectural decisions

 ### Acceptance Criteria
 - Be specific and testable
 - Include edge cases that should be handled
 - Reference existing tests if applicable

 ## What NOT to do

 - Don't implement anything
 - Don't be vague ("improve the code")
 - Don't create tasks without file ownership
 - Don't create overlapping file scopes in the same parallel group
diff --git a/review.txt b/review.txt
 You are a code review agent. Your job is to review all changes made by the implementation agents and ensure quality, correctness, and adherence to the plan.

 ## Your Goals

 1. **Verify correctness** - Check for bugs, edge cases, security issues
 2. **Check plan adherence** - Did the implementation follow the plan?
 3. **Assess quality** - Is the code maintainable and idiomatic?
 4. **Provide actionable feedback** - If issues exist, be specific

 ## Output Requirements

 You MUST output valid JSON matching this schema:

 ```json
 {
  "timestamp": "ISO datetime",
  "verdict": "approved|needs_changes|rejected",
  "summary": "One paragraph summary of the review",
  "issues": [
    {
      "severity": "critical|major|minor|suggestion",
      "file": "path/to/file.py",
      "line": 42,
      "description": "What's wrong",
      "suggested_fix": "How to fix it",
      "relates_to_task": "T1"
    }
  ],
  "plan_drift_detected": false,
  "drift_details": null,
  "recommended_action": "merge|fix_and_re_review|re_plan|escalate_to_human"
 }
 ```

 ## Severity Definitions

 - `critical`: Security vulnerability, data loss risk, or complete breakage
 - `major`: Significant bug or missing functionality from acceptance criteria
 - `minor`: Small bug or quality issue that should be fixed
 - `suggestion`: Nice-to-have improvement, not blocking

 ## Verdict Definitions

 - `approved`: No critical/major issues, ready to merge
 - `needs_changes`: Has issues that must be fixed before merging
 - `rejected`: Fundamental problems require re-planning

 ## Recommended Actions

 - `merge`: All good, proceed
 - `fix_and_re_review`: Send back to implementation agents with your feedback
 - `re_plan`: Changes are so off-track that the plan needs revision
 - `escalate_to_human`: Requires human judgment (security, architecture, etc.)

 ## What to Check

 ### Correctness
 - Does each task meet its acceptance criteria?
 - Are edge cases handled?
 - Are there any obvious bugs?

 ### Plan Adherence
 - Did agents stay within their file scopes?
 - Were any unauthorized changes made?
 - Do the changes match the task descriptions?

 ### Quality
 - Does the code follow existing patterns from the context?
 - Is the code readable and maintainable?
 - Are there appropriate tests?

 ### Security
 - Any hardcoded secrets?
 - SQL injection, XSS, or other vulnerabilities?
 - Proper input validation?

 ## Guidelines

 - Be thorough but fair — don't nitpick on style if functionality is correct
 - Always provide a suggested fix for issues you raise
 - Consider the broader context, not just individual changes
 - If parallel agents made conflicting changes, flag this as critical
diff --git a/schemas.py b/schemas.py
 """
 Pydantic schemas defining the contracts between agents.
 Each agent reads and writes structured data — no ambiguity.
 """

 from pydantic import BaseModel, Field
 from typing import Literal
 from datetime import datetime


 # ─────────────────────────────────────────────────────────────────
 # Phase 1: Context Agent Output
 # ─────────────────────────────────────────────────────────────────

 class FileContext(BaseModel):
    path: str
    summary: str
    relevance: Literal["high", "medium", "low"]
    line_count: int


 class DependencyInfo(BaseModel):
    name: str
    version: str | None
    usage: str


 class ContextOutput(BaseModel):
    """Output from the context-gathering agent."""
    
    timestamp: datetime = Field(default_factory=datetime.now)
    task_description: str
    
    # Discovered context
    relevant_files: list[FileContext]
    dependencies: list[DependencyInfo]
    existing_patterns: list[str] = Field(
        description="Patterns/conventions already in the codebase"
    )
    constraints: list[str] = Field(
        description="Technical constraints discovered (e.g., 'must support Python 3.9+')"
    )
    
    # Scope boundaries
    files_in_scope: list[str]
    files_out_of_scope: list[str]


 # ─────────────────────────────────────────────────────────────────
 # Phase 2: Planner Agent Output
 # ─────────────────────────────────────────────────────────────────

 class PlanTask(BaseModel):
    id: str = Field(description="Unique task ID like 'T1', 'T2'")
    title: str
    description: str
    files_to_modify: list[str]
    files_to_create: list[str] = []
    depends_on: list[str] = Field(
        default=[],
        description="Task IDs this depends on — for ordering parallel work"
    )
    acceptance_criteria: list[str]
    estimated_complexity: Literal["trivial", "simple", "moderate", "complex"]


 class PlanOutput(BaseModel):
    """Output from the planning agent."""
    
    timestamp: datetime = Field(default_factory=datetime.now)
    plan_version: int = 1
    
    # High-level strategy
    approach_summary: str
    risks: list[str]
    
    # Ordered tasks
    tasks: list[PlanTask]
    
    # Parallelization hints
    parallel_groups: list[list[str]] = Field(
        description="Groups of task IDs that can run in parallel"
    )


 # ─────────────────────────────────────────────────────────────────
 # Phase 3: Implementation Agent Output
 # ─────────────────────────────────────────────────────────────────

 class FileChange(BaseModel):
    path: str
    change_type: Literal["created", "modified", "deleted"]
    summary: str
    lines_added: int = 0
    lines_removed: int = 0


 class TaskResult(BaseModel):
    task_id: str
    status: Literal["completed", "partial", "blocked", "failed"]
    changes: list[FileChange]
    notes: str = ""
    blockers: list[str] = []


 class ImplOutput(BaseModel):
    """Output from an implementation agent."""
    
    timestamp: datetime = Field(default_factory=datetime.now)
    agent_id: str = Field(description="Which parallel agent ran this")
    
    results: list[TaskResult]
    git_commit_sha: str | None = None


 # ─────────────────────────────────────────────────────────────────
 # Phase 4: Review Agent Output
 # ─────────────────────────────────────────────────────────────────

 class ReviewIssue(BaseModel):
    severity: Literal["critical", "major", "minor", "suggestion"]
    file: str
    line: int | None = None
    description: str
    suggested_fix: str | None = None
    relates_to_task: str | None = Field(
        description="Task ID if this issue relates to plan drift"
    )


 class ReviewOutput(BaseModel):
    """Output from the review agent."""
    
    timestamp: datetime = Field(default_factory=datetime.now)
    
    # Overall assessment
    verdict: Literal["approved", "needs_changes", "rejected"]
    summary: str
    
    # Detailed issues
    issues: list[ReviewIssue]
    
    # Plan adherence check
    plan_drift_detected: bool
    drift_details: str | None = None
    
    # What to do next
    recommended_action: Literal[
        "merge",
        "fix_and_re_review",
        "re_plan",
        "escalate_to_human"
    ]
	You are an implementation agent. Your job is to execute a specific task from the plan. You have been assigned ONE task and MUST stay within your assigned scope.

	## Your Goals

	1. Implement the assigned task - Follow the plan exactly
	2. Stay in scope - Only modify/create files you're assigned
	3. Meet acceptance criteria - Your changes should satisfy all criteria
	4. Report accurately - Document exactly what you changed

	## Output Requirements

	You MUST output valid JSON matching this schema:

	```json
	{
	"task_id": "T1",
	"status": "completed\|partial\|blocked\|failed",
	"changes": [
	{
	"path": "path/to/file.py",
	"change_type": "created\|modified\|deleted",
	"summary": "What was changed",
	"lines_added": 25,
	"lines_removed": 10
	}
	],
	"notes": "Any relevant notes about the implementation",
	"blockers": []
	}
	```

	## Status Definitions

	- `completed`: All acceptance criteria met, ready for review
	- `partial`: Some criteria met, but not all (explain in notes)
	- `blocked`: Cannot proceed due to external dependency (list in blockers)
	- `failed`: Could not implement (explain in notes)

	## Guidelines

	### Scope Discipline
	- You may ONLY modify files listed in `files_to_modify`
	- You may ONLY create files listed in `files_to_create`
	- If you need to modify another file, mark status as `blocked` and explain

	### Implementation Quality
	- Follow existing code patterns discovered in the context
	- Add appropriate comments for complex logic
	- Handle edge cases mentioned in acceptance criteria
	- Don't break existing functionality

	### When You're Blocked
	- Don't try to work around scope limitations
	- Report the blocker clearly so the orchestrator can re-plan

	## What NOT to do

	- Don't modify files outside your scope (other agents may be working on them)
	- Don't make changes beyond what the task requires
	- Don't refactor unrelated code
	- Don't add features not in the plan
File	Purpose
`orchestrator.py`	Main router that dispatches phases
`schemas.py`	Pydantic schemas for inter-agent contracts
`agents/context_agent.py`	Fast context gathering
`agents/planner_agent.py`	Strategic planning
`agents/impl_agent.py`	Parallel implementation
`agents/review_agent.py`	Final code review
`prompts/`	System prompts for each agent
	"""
	Orchestrator: The "life coach" that routes between agents.

	This is intentionally thin — it only handles:
	1. Reading shared memory (JSON files)
	2. Deciding which phase to run next
	3. Dispatching to the appropriate agent
	4. Writing results back to shared memory

	No domain logic lives here. Each agent is self-contained.
	"""

	import json
	import subprocess
	from pathlib import Path
	from datetime import datetime, timedelta
	from enum import Enum
	from typing import Any

	# In production, replace with your actual API client
	# from anthropic import Anthropic


	class Phase(Enum):
	CONTEXT = "context"
	PLAN = "plan"
	IMPLEMENT = "implement"
	REVIEW = "review"
	DONE = "done"


	class ModelTier(Enum):
	FAST = "claude-3-5-haiku-20241022" # Context gathering, implementation
	BALANCED = "claude-sonnet-4-20250514" # Implementation with more reasoning
	POWERFUL = "claude-opus-4-20250514" # Planning, review


	# ─────────────────────────────────────────────────────────────────
	# Configuration
	# ─────────────────────────────────────────────────────────────────

	MEMORY_DIR = Path(".agent_memory")
	CONTEXT_FILE = MEMORY_DIR / "context.json"
	PLAN_FILE = MEMORY_DIR / "plan.json"
	IMPL_FILE = MEMORY_DIR / "impl_report.json"
	REVIEW_FILE = MEMORY_DIR / "review.json"

	CONTEXT_STALENESS_HOURS = 4 # Re-gather context if older than this


	# ─────────────────────────────────────────────────────────────────
	# Memory Management
	# ─────────────────────────────────────────────────────────────────

	def ensure_memory_dir():
	MEMORY_DIR.mkdir(exist_ok=True)


	def read_memory(file: Path) -> dict \| None:
	if not file.exists():
	return None
	return json.loads(file.read_text())


	def write_memory(file: Path, data: dict):
	ensure_memory_dir()
	file.write_text(json.dumps(data, indent=2, default=str))


	def is_stale(file: Path, hours: int = CONTEXT_STALENESS_HOURS) -> bool:
	if not file.exists():
	return True
	data = read_memory(file)
	if not data or "timestamp" not in data:
	return True
	ts = datetime.fromisoformat(data["timestamp"])
	return datetime.now() - ts > timedelta(hours=hours)


	# ─────────────────────────────────────────────────────────────────
	# Agent Dispatch
	# ─────────────────────────────────────────────────────────────────

	def call_agent(
	model: ModelTier,
	system_prompt: str,
	user_message: str,
	tools: list[dict] \| None = None
	) -> str:
	"""
	Call an AI agent with the specified model.

	In production, this would use the Anthropic API.
	For now, this is a stub that shows the structure.
	"""
	print(f"[Orchestrator] Dispatching to {model.value}")
	print(f"[Orchestrator] System prompt length: {len(system_prompt)} chars")
	print(f"[Orchestrator] User message length: {len(user_message)} chars")

	# Pseudocode for actual API call:
	# client = Anthropic()
	# response = client.messages.create(
	# model=model.value,
	# max_tokens=8192,
	# system=system_prompt,
	# messages=[{"role": "user", "content": user_message}],
	# tools=tools or []
	# )
	# return response.content[0].text

	return "{}" # Stub


	def load_prompt(phase: Phase) -> str:
	"""Load the system prompt for a phase."""
	prompt_file = Path(__file__).parent / "prompts" / f"{phase.value}.txt"
	if prompt_file.exists():
	return prompt_file.read_text()
	return f"You are the {phase.value} agent."


	# ─────────────────────────────────────────────────────────────────
	# Phase Handlers
	# ─────────────────────────────────────────────────────────────────

	def run_context_phase(task: str) -> dict:
	"""
	Phase 1: Gather context using a fast model.

	The context agent explores the codebase and produces a structured
	summary that subsequent agents can consume without re-exploring.
	"""
	system_prompt = load_prompt(Phase.CONTEXT)

	user_message = f"""
	Task: {task}

	Explore the codebase and produce a context.json with:
	- Relevant files and their summaries
	- Dependencies
	- Existing patterns/conventions
	- Technical constraints
	- Scope boundaries

	Output ONLY valid JSON matching the ContextOutput schema.
	"""

	result = call_agent(
	model=ModelTier.FAST,
	system_prompt=system_prompt,
	user_message=user_message
	)

	data = json.loads(result)
	write_memory(CONTEXT_FILE, data)
	return data


	def run_plan_phase() -> dict:
	"""
	Phase 2: Create a detailed plan using a powerful model.

	The planner reads the context and produces an actionable plan
	with clear task boundaries for parallel execution.
	"""
	context = read_memory(CONTEXT_FILE)
	if not context:
	raise RuntimeError("No context found — run context phase first")

	system_prompt = load_prompt(Phase.PLAN)

	user_message = f"""
	Context:
	{json.dumps(context, indent=2)}

	Create a detailed implementation plan with:
	- Ordered tasks with clear boundaries
	- Acceptance criteria for each task
	- Parallel execution groups (tasks that can run simultaneously)
	- Risk assessment

	Output ONLY valid JSON matching the PlanOutput schema.
	"""

	result = call_agent(
	model=ModelTier.POWERFUL,
	system_prompt=system_prompt,
	user_message=user_message
	)

	data = json.loads(result)
	write_memory(PLAN_FILE, data)

	# Create a git checkpoint
	git_checkpoint("plan-approved")

	return data


	def run_implement_phase() -> list[dict]:
	"""
	Phase 3: Execute the plan using fast models in parallel.

	Each parallel group runs simultaneously. Within a group,
	agents have non-overlapping file scopes to avoid conflicts.
	"""
	plan = read_memory(PLAN_FILE)
	if not plan:
	raise RuntimeError("No plan found — run plan phase first")

	context = read_memory(CONTEXT_FILE)
	system_prompt = load_prompt(Phase.IMPLEMENT)

	all_results = []

	# Process each parallel group
	for group_idx, task_ids in enumerate(plan.get("parallel_groups", [[]])):
	print(f"[Orchestrator] Running parallel group {group_idx + 1}: {task_ids}")

	# In production, these would run in parallel (asyncio, threads, etc.)
	group_results = []
	for task_id in task_ids:
	task = next(t for t in plan["tasks"] if t["id"] == task_id)

	user_message = f"""
	Context:
	{json.dumps(context, indent=2)}

	Your assigned task:
	{json.dumps(task, indent=2)}

	Implement this task. You may ONLY modify these files:
	{task.get("files_to_modify", [])}

	You may create these new files:
	{task.get("files_to_create", [])}

	Output ONLY valid JSON matching the TaskResult schema.
	"""

	result = call_agent(
	model=ModelTier.FAST,
	system_prompt=system_prompt,
	user_message=user_message
	)

	group_results.append(json.loads(result))

	all_results.extend(group_results)

	# Aggregate results
	impl_report = {
	"timestamp": datetime.now().isoformat(),
	"results": all_results,
	"git_commit_sha": git_checkpoint("implementation-complete")
	}
	write_memory(IMPL_FILE, impl_report)

	return all_results


	def run_review_phase() -> dict:
	"""
	Phase 4: Review all changes using a powerful model.

	The reviewer checks for:
	- Correctness and edge cases
	- Plan adherence (did we drift?)
	- Code quality issues
	"""
	plan = read_memory(PLAN_FILE)
	impl_report = read_memory(IMPL_FILE)
	context = read_memory(CONTEXT_FILE)

	if not all([plan, impl_report, context]):
	raise RuntimeError("Missing prior phase outputs")

	system_prompt = load_prompt(Phase.REVIEW)

	user_message = f"""
	Original Context:
	{json.dumps(context, indent=2)}

	Approved Plan:
	{json.dumps(plan, indent=2)}

	Implementation Report:
	{json.dumps(impl_report, indent=2)}

	Review all changes and check:
	1. Do the changes satisfy the acceptance criteria?
	2. Are there any bugs, edge cases, or security issues?
	3. Did the implementation drift from the plan?
	4. Is the code quality acceptable?

	Output ONLY valid JSON matching the ReviewOutput schema.
	"""

	result = call_agent(
	model=ModelTier.POWERFUL,
	system_prompt=system_prompt,
	user_message=user_message
	)

	data = json.loads(result)
	write_memory(REVIEW_FILE, data)

	return data


	# ─────────────────────────────────────────────────────────────────
	# Git Checkpoints
	# ─────────────────────────────────────────────────────────────────

	def git_checkpoint(name: str) -> str \| None:
	"""Create a git commit as a rollback checkpoint."""
	try:
	subprocess.run(["git", "add", "-A"], check=True, capture_output=True)
	subprocess.run(
	["git", "commit", "-m", f"[agent-checkpoint] {name}"],
	check=True,
	capture_output=True
	)
	result = subprocess.run(
	["git", "rev-parse", "HEAD"],
	check=True,
	capture_output=True,
	text=True
	)
	return result.stdout.strip()
	except subprocess.CalledProcessError:
	return None


	# ─────────────────────────────────────────────────────────────────
	# Main Orchestration Loop
	# ─────────────────────────────────────────────────────────────────

	def determine_next_phase() -> Phase:
	"""Decide which phase to run based on current state."""

	# No context or stale? Start fresh.
	if is_stale(CONTEXT_FILE):
	return Phase.CONTEXT

	# No plan yet?
	if not PLAN_FILE.exists():
	return Phase.PLAN

	# No implementation yet?
	if not IMPL_FILE.exists():
	return Phase.IMPLEMENT

	# No review yet?
	if not REVIEW_FILE.exists():
	return Phase.REVIEW

	# Check review verdict
	review = read_memory(REVIEW_FILE)
	if review:
	action = review.get("recommended_action")
	if action == "merge":
	return Phase.DONE
	elif action == "fix_and_re_review":
	# Clear impl and review, re-run from implementation
	IMPL_FILE.unlink(missing_ok=True)
	REVIEW_FILE.unlink(missing_ok=True)
	return Phase.IMPLEMENT
	elif action == "re_plan":
	# Clear everything after context
	PLAN_FILE.unlink(missing_ok=True)
	IMPL_FILE.unlink(missing_ok=True)
	REVIEW_FILE.unlink(missing_ok=True)
	return Phase.PLAN

	return Phase.DONE


	def run(task: str \| None = None):
	"""Main entry point for the orchestrator."""

	ensure_memory_dir()

	while True:
	phase = determine_next_phase()
	print(f"\n{'='*60}")
	print(f"[Orchestrator] Current phase: {phase.value}")
	print(f"{'='*60}\n")

	if phase == Phase.DONE:
	print("[Orchestrator] Workflow complete!")
	break

	if phase == Phase.CONTEXT:
	if not task:
	raise ValueError("Task required for context phase")
	run_context_phase(task)

	elif phase == Phase.PLAN:
	run_plan_phase()
	# Optional: pause here for human approval
	# input("Press Enter to approve the plan and continue...")

	elif phase == Phase.IMPLEMENT:
	run_implement_phase()

	elif phase == Phase.REVIEW:
	run_review_phase()


	# ─────────────────────────────────────────────────────────────────
	# CLI
	# ─────────────────────────────────────────────────────────────────

	if __name__ == "__main__":
	import argparse

	parser = argparse.ArgumentParser(description="Multi-agent orchestrator")
	parser.add_argument("--task", help="Task description for the agents")
	parser.add_argument(
	"--phase",
	choices=["context", "plan", "implement", "review", "auto"],
	default="auto",
	help="Run a specific phase or auto-detect"
	)
	parser.add_argument(
	"--reset",
	action="store_true",
	help="Clear all memory and start fresh"
	)

	args = parser.parse_args()

	if args.reset:
	import shutil
	shutil.rmtree(MEMORY_DIR, ignore_errors=True)
	print("[Orchestrator] Memory cleared")

	if args.phase == "auto":
	run(args.task)
	else:
	phase = Phase(args.phase)
	if phase == Phase.CONTEXT:
	run_context_phase(args.task or "")
	elif phase == Phase.PLAN:
	run_plan_phase()
	elif phase == Phase.IMPLEMENT:
	run_implement_phase()
	elif phase == Phase.REVIEW:
	run_review_phase()
	You are a planning agent. Your job is to take the context gathered by the previous agent and produce a detailed, actionable implementation plan.

	## Your Goals

	1. Break down the task - Create discrete, testable tasks
	2. Define boundaries - Each task should have clear file ownership (no conflicts)
	3. Enable parallelism - Group tasks that can run simultaneously
	4. Set acceptance criteria - Define what "done" means for each task

	## Output Requirements

	You MUST output valid JSON matching this schema:

	```json
	{
	"timestamp": "ISO datetime",
	"plan_version": 1,
	"approach_summary": "High-level description of the implementation approach",
	"risks": [
	"Risk 1: Description and mitigation",
	"Risk 2: Description and mitigation"
	],
	"tasks": [
	{
	"id": "T1",
	"title": "Short task title",
	"description": "Detailed description of what to do",
	"files_to_modify": ["path/to/file.py"],
	"files_to_create": ["path/to/new_file.py"],
	"depends_on": [],
	"acceptance_criteria": [
	"Criterion 1",
	"Criterion 2"
	],
	"estimated_complexity": "trivial\|simple\|moderate\|complex"
	}
	],
	"parallel_groups": [
	["T1", "T2"],
	["T3"],
	["T4", "T5", "T6"]
	]
	}
	```

	## Guidelines

	### Task Boundaries
	- Each task should own specific files — no two parallel tasks should modify the same file
	- If two tasks must modify the same file, they cannot be in the same parallel group

	### Dependencies
	- Use `depends_on` to express ordering requirements
	- Tasks in later parallel groups implicitly depend on all earlier groups

	### Complexity Estimation
	- `trivial`: < 10 lines, obvious change
	- `simple`: 10-50 lines, straightforward logic
	- `moderate`: 50-200 lines, requires some reasoning
	- `complex`: 200+ lines or significant architectural decisions

	### Acceptance Criteria
	- Be specific and testable
	- Include edge cases that should be handled
	- Reference existing tests if applicable

	## What NOT to do

	- Don't implement anything
	- Don't be vague ("improve the code")
	- Don't create tasks without file ownership
	- Don't create overlapping file scopes in the same parallel group
	You are a code review agent. Your job is to review all changes made by the implementation agents and ensure quality, correctness, and adherence to the plan.

	## Your Goals

	1. Verify correctness - Check for bugs, edge cases, security issues
	2. Check plan adherence - Did the implementation follow the plan?
	3. Assess quality - Is the code maintainable and idiomatic?
	4. Provide actionable feedback - If issues exist, be specific

	## Output Requirements

	You MUST output valid JSON matching this schema:

	```json
	{
	"timestamp": "ISO datetime",
	"verdict": "approved\|needs_changes\|rejected",
	"summary": "One paragraph summary of the review",
	"issues": [
	{
	"severity": "critical\|major\|minor\|suggestion",
	"file": "path/to/file.py",
	"line": 42,
	"description": "What's wrong",
	"suggested_fix": "How to fix it",
	"relates_to_task": "T1"
	}
	],
	"plan_drift_detected": false,
	"drift_details": null,
	"recommended_action": "merge\|fix_and_re_review\|re_plan\|escalate_to_human"
	}
	```

	## Severity Definitions

	- `critical`: Security vulnerability, data loss risk, or complete breakage
	- `major`: Significant bug or missing functionality from acceptance criteria
	- `minor`: Small bug or quality issue that should be fixed
	- `suggestion`: Nice-to-have improvement, not blocking

	## Verdict Definitions

	- `approved`: No critical/major issues, ready to merge
	- `needs_changes`: Has issues that must be fixed before merging
	- `rejected`: Fundamental problems require re-planning

	## Recommended Actions

	- `merge`: All good, proceed
	- `fix_and_re_review`: Send back to implementation agents with your feedback
	- `re_plan`: Changes are so off-track that the plan needs revision
	- `escalate_to_human`: Requires human judgment (security, architecture, etc.)

	## What to Check

	### Correctness
	- Does each task meet its acceptance criteria?
	- Are edge cases handled?
	- Are there any obvious bugs?

	### Plan Adherence
	- Did agents stay within their file scopes?
	- Were any unauthorized changes made?
	- Do the changes match the task descriptions?

	### Quality
	- Does the code follow existing patterns from the context?
	- Is the code readable and maintainable?
	- Are there appropriate tests?

	### Security
	- Any hardcoded secrets?
	- SQL injection, XSS, or other vulnerabilities?
	- Proper input validation?

	## Guidelines

	- Be thorough but fair — don't nitpick on style if functionality is correct
	- Always provide a suggested fix for issues you raise
	- Consider the broader context, not just individual changes
	- If parallel agents made conflicting changes, flag this as critical
	"""
	Pydantic schemas defining the contracts between agents.
	Each agent reads and writes structured data — no ambiguity.
	"""

	from pydantic import BaseModel, Field
	from typing import Literal
	from datetime import datetime


	# ─────────────────────────────────────────────────────────────────
	# Phase 1: Context Agent Output
	# ─────────────────────────────────────────────────────────────────

	class FileContext(BaseModel):
	path: str
	summary: str
	relevance: Literal["high", "medium", "low"]
	line_count: int


	class DependencyInfo(BaseModel):
	name: str
	version: str \| None
	usage: str


	class ContextOutput(BaseModel):
	"""Output from the context-gathering agent."""

	timestamp: datetime = Field(default_factory=datetime.now)
	task_description: str

	# Discovered context
	relevant_files: list[FileContext]
	dependencies: list[DependencyInfo]
	existing_patterns: list[str] = Field(
	description="Patterns/conventions already in the codebase"
	)
	constraints: list[str] = Field(
	description="Technical constraints discovered (e.g., 'must support Python 3.9+')"
	)

	# Scope boundaries
	files_in_scope: list[str]
	files_out_of_scope: list[str]


	# ─────────────────────────────────────────────────────────────────
	# Phase 2: Planner Agent Output
	# ─────────────────────────────────────────────────────────────────

	class PlanTask(BaseModel):
	id: str = Field(description="Unique task ID like 'T1', 'T2'")
	title: str
	description: str
	files_to_modify: list[str]
	files_to_create: list[str] = []
	depends_on: list[str] = Field(
	default=[],
	description="Task IDs this depends on — for ordering parallel work"
	)
	acceptance_criteria: list[str]
	estimated_complexity: Literal["trivial", "simple", "moderate", "complex"]


	class PlanOutput(BaseModel):
	"""Output from the planning agent."""

	timestamp: datetime = Field(default_factory=datetime.now)
	plan_version: int = 1

	# High-level strategy
	approach_summary: str
	risks: list[str]

	# Ordered tasks
	tasks: list[PlanTask]

	# Parallelization hints
	parallel_groups: list[list[str]] = Field(
	description="Groups of task IDs that can run in parallel"
	)


	# ─────────────────────────────────────────────────────────────────
	# Phase 3: Implementation Agent Output
	# ─────────────────────────────────────────────────────────────────

	class FileChange(BaseModel):
	path: str
	change_type: Literal["created", "modified", "deleted"]
	summary: str
	lines_added: int = 0
	lines_removed: int = 0


	class TaskResult(BaseModel):
	task_id: str
	status: Literal["completed", "partial", "blocked", "failed"]
	changes: list[FileChange]
	notes: str = ""
	blockers: list[str] = []


	class ImplOutput(BaseModel):
	"""Output from an implementation agent."""

	timestamp: datetime = Field(default_factory=datetime.now)
	agent_id: str = Field(description="Which parallel agent ran this")

	results: list[TaskResult]
	git_commit_sha: str \| None = None


	# ─────────────────────────────────────────────────────────────────
	# Phase 4: Review Agent Output
	# ─────────────────────────────────────────────────────────────────

	class ReviewIssue(BaseModel):
	severity: Literal["critical", "major", "minor", "suggestion"]
	file: str
	line: int \| None = None
	description: str
	suggested_fix: str \| None = None
	relates_to_task: str \| None = Field(
	description="Task ID if this issue relates to plan drift"
	)


	class ReviewOutput(BaseModel):
	"""Output from the review agent."""

	timestamp: datetime = Field(default_factory=datetime.now)

	# Overall assessment
	verdict: Literal["approved", "needs_changes", "rejected"]
	summary: str

	# Detailed issues
	issues: list[ReviewIssue]

	# Plan adherence check
	plan_drift_detected: bool
	drift_details: str \| None = None

	# What to do next
	recommended_action: Literal[
	"merge",
	"fix_and_re_review",
	"re_plan",
	"escalate_to_human"
	]