NorikDavtian · February 14, 2026 08:17
diff --git a/agent.sh b/agent.sh
 #!/bin/bash
 ###############################################################################
 # agent.sh — Multi-Agent Development Pipeline
 #
 # Usage:
 #   ./agent.sh              # Run full daily cycle
 #   ./agent.sh plan         # Run only the planning agent
 #   ./agent.sh dev          # Run only the dev agent
 #   ./agent.sh review       # Run only the code review agent
 #   ./agent.sh fix          # Run only the fix agent
 #   ./agent.sh qa           # Deep coverage mode: hunt untested code across codebase
 #   ./agent.sh ux           # Run only the UX audit agent
 #   ./agent.sh security     # Run only the security audit agent
 #   ./agent.sh arch         # Run only the architecture review agent
 #   ./agent.sh hacker       # Run only the hacker/bug-hunter agent (find bugs, dead UI, suggest improvements)
 #   ./agent.sh verify       # Run only the final verification gate
 #   ./agent.sh quick        # Run the minimal 3-agent loop (build → review → fix)
 #   ./agent.sh from <stage> # Run pipeline from a stage (e.g. from dev)
 #   ./agent.sh status       # Show pipeline progress (no AI, instant)
 #   ./agent.sh manager      # AI progress report with completion percentage
 #   ./agent.sh abort        # Gracefully stop the running pipeline
 #
 # Prerequisites:
 #   - claude CLI installed and authenticated
 #   - git repo initialized
 #   - PRODUCT_SPEC.md in repo root (the source of truth for your product)
 #
 # Optional:
 #   - CLAUDE.md in repo root (persistent instructions for all agents)
 #   - /tasks/templates/ with structured templates (created on first run)
 ###############################################################################

 set -euo pipefail

 # ─── Self-copy guard ────────────────────────────────────────────────────────
 # Bash reads scripts incrementally — if agent.sh is edited while running,
 # the process crashes. Copy to a temp file and re-exec from there so the
 # original can be safely modified during a pipeline run.
 if [ -z "${AGENT_RUNNING_FROM_COPY:-}" ]; then
    tmp_copy="$(mktemp /tmp/agent.sh.XXXXXX)"
    cp "$0" "$tmp_copy"
    chmod +x "$tmp_copy"
    AGENT_RUNNING_FROM_COPY=1 exec "$tmp_copy" "$@"
 fi

 # ─── Configuration ───────────────────────────────────────────────────────────

 PROJECT_ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)"
 TASKS_DIR="$PROJECT_ROOT/tasks"
 TEMPLATES_DIR="$TASKS_DIR/templates"
 DATE_STAMP="$(date +%Y-%m-%d)"
 TIME_STAMP="$(date +%H%M%S)"
 LOG_DIR="$PROJECT_ROOT/logs/$DATE_STAMP/$TIME_STAMP"
 RUN_FILE="$PROJECT_ROOT/.agent.run"
 BRANCH_NAME="feature/$DATE_STAMP-$TIME_STAMP"

 # Abort / kill switch state
 PID_FILE="$PROJECT_ROOT/.agent.pid"
 ABORT_FILE="$PROJECT_ROOT/.agent.abort"
 CURRENT_PHASE=""

 # Colors for terminal output
 RED='\033[0;31m'
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 BLUE='\033[0;34m'
 PURPLE='\033[0;35m'
 CYAN='\033[0;36m'
 NC='\033[0m' # No Color

 # ─── Setup ───────────────────────────────────────────────────────────────────

 setup() {
    mkdir -p "$TASKS_DIR" "$TEMPLATES_DIR" "$LOG_DIR"

    # Create PRODUCT_SPEC.md if it doesn't exist
    if [ ! -f "$PROJECT_ROOT/PRODUCT_SPEC.md" ]; then
        echo -e "${YELLOW}⚠ No PRODUCT_SPEC.md found. Creating a starter template...${NC}"
        cat > "$PROJECT_ROOT/PRODUCT_SPEC.md" << 'SPEC'
 # Product Specification

 ## Vision
 <!-- What is this product and why does it exist? -->

 ## Target User
 <!-- Who is this for? What's their key problem? -->

 ## Core Features (Priority Order)
 <!-- List features with acceptance criteria -->

 ### Feature 1: [Name]
 - **User Story:** As a [user], I want to [action] so that [benefit]
 - **Acceptance Criteria:**
  - [ ] Criterion 1
  - [ ] Criterion 2
 - **Edge Cases:**
  - What if...
 - **Status:** Not Started

 ## Tech Stack
 <!-- Languages, frameworks, infrastructure -->

 ## UX Principles
 <!-- Design values, accessibility requirements, interaction patterns -->

 ## Non-Functional Requirements
 <!-- Performance, security, accessibility standards -->

 ## Completed Work
 <!-- Agents will update this section as features are shipped -->
 SPEC
        echo -e "${GREEN}✓ Created PRODUCT_SPEC.md — please fill it in before running agents.${NC}"
        exit 0
    fi

    # Create handoff templates
    if [ ! -f "$TEMPLATES_DIR/ticket.md" ]; then
        create_templates
    fi
 }

 create_templates() {
    cat > "$TEMPLATES_DIR/ticket.md" << 'TMPL'
 # Feature: [Title]

 ## Priority
 [Critical / High / Medium / Low]

 ## User Story
 As a [user type], I want to [action] so that [benefit].

 ## Acceptance Criteria
 - [ ] Criterion 1
 - [ ] Criterion 2
 - [ ] Criterion 3

 ## Edge Cases
 1. What happens when input is empty?
 2. What happens when input is extremely large?
 3. What happens with concurrent users?
 4. What happens when the network fails?
 5. What happens with unexpected data types?

 ## Error States
 | Trigger | User Sees | System Does |
 |---------|-----------|-------------|
 | ... | ... | ... |

 ## UX Requirements
 - **Loading state:** ...
 - **Empty state:** ...
 - **Error state:** ...
 - **Success feedback:** ...
 - **Mobile behavior:** ...

 ## Technical Approach
 - Files to create/modify: ...
 - Dependencies needed: ...
 - Key design decisions: ...

 ## Out of Scope
 - ...
 TMPL

    cat > "$TEMPLATES_DIR/review.md" << 'TMPL'
 # Code Review: [Feature Name]

 ## Review Date
 [Date]

 ## Files Reviewed
 - file1.ext
 - file2.ext

 ## Critical Issues (must fix before merge)
 | # | File:Line | Issue | Suggested Fix |
 |---|-----------|-------|---------------|

 ## Major Issues (should fix)
 | # | File:Line | Issue | Suggested Fix |
 |---|-----------|-------|---------------|

 ## Minor Issues (nice to fix)
 | # | File:Line | Issue | Suggested Fix |
 |---|-----------|-------|---------------|

 ## Security Concerns
 - ...

 ## Performance Concerns
 - ...

 ## Quality Score: X/10
 ## Recommendation: APPROVE / REQUEST CHANGES / BLOCK
 TMPL

    cat > "$TEMPLATES_DIR/qa-report.md" << 'TMPL'
 # QA Report: [Feature Name]

 ## Test Date
 [Date]

 ## Tests Written
 | Test Name | Type | What It Verifies |
 |-----------|------|------------------|

 ## Test Results
 - Total: X
 - Passed: X
 - Failed: X
 - Skipped: X

 ## Failed Tests
 | Test | Expected | Actual | Root Cause |
 |------|----------|--------|------------|

 ## Edge Cases Tested
 - [ ] Empty input
 - [ ] Boundary values
 - [ ] Invalid types
 - [ ] Concurrent access
 - [ ] Network failure
 - [ ] Large payloads

 ## Acceptance Criteria Verification
 - [ ] Criterion 1 — PASS/FAIL
 - [ ] Criterion 2 — PASS/FAIL

 ## Bugs Found Outside Tests
 | # | Severity | Description | Steps to Reproduce |
 |---|----------|-------------|-------------------|

 ## Confidence Level: HIGH / MEDIUM / LOW
 TMPL

    cat > "$TEMPLATES_DIR/ux-audit.md" << 'TMPL'
 # UX Audit: [Feature Name]

 ## Audit Date
 [Date]

 ## Usability Issues
 | # | Severity | Screen/Component | Issue | Recommendation |
 |---|----------|-----------------|-------|----------------|

 ## Accessibility Issues
 | # | WCAG Level | Issue | Fix |
 |---|------------|-------|-----|

 ## Missing States
 - [ ] Loading / skeleton
 - [ ] Empty / zero data
 - [ ] Error / failure
 - [ ] Success / confirmation
 - [ ] Offline / degraded
 - [ ] Permission denied

 ## Consistency Issues
 - ...

 ## Copy / Labeling Issues
 - ...

 ## Mobile / Responsive Issues
 - ...

 ## Overall UX Score: X/10
 TMPL

    cat > "$TEMPLATES_DIR/security-audit.md" << 'TMPL'
 # Security Audit: [Feature Name]

 ## Audit Date
 [Date]

 ## Secrets & Credentials
 | # | Severity | File:Line | Issue | Fix |
 |---|----------|-----------|-------|-----|

 ## Checklist
 - [ ] No secrets, API keys, passwords, or tokens in source code or docs
 - [ ] No secrets in git history (check staged diffs)
 - [ ] All user input sanitized (SQL injection, XSS, command injection)
 - [ ] Authentication checked on all new endpoints
 - [ ] Authorization checked — correct role/permission guards
 - [ ] No IDOR (Insecure Direct Object Reference) vulnerabilities
 - [ ] File uploads validated (type, size, content)
 - [ ] Rate limiting on sensitive endpoints
 - [ ] Error messages don't leak internal details
 - [ ] CORS policy not overly permissive
 - [ ] No hardcoded credentials or default passwords
 - [ ] Sensitive data encrypted at rest and in transit

 ## Injection Vulnerabilities
 | # | Type | File:Line | Issue | Fix |
 |---|------|-----------|-------|-----|

 ## Auth & Authz Issues
 | # | Severity | Endpoint | Issue | Fix |
 |---|----------|----------|-------|-----|

 ## Data Exposure Risks
 | # | Severity | Issue | Fix |
 |---|----------|-------|-----|

 ## Dependency Vulnerabilities
 - [ ] No known CVEs in dependencies
 - [ ] Dependencies pinned to specific versions

 ## Security Score: X/10
 ## Recommendation: PASS / CONDITIONAL PASS / FAIL
 TMPL

    cat > "$TEMPLATES_DIR/architecture-review.md" << 'TMPL'
 # Architecture Review: [Feature Name]

 ## Review Date
 [Date]

 ## Scope of Changes
 - Files created: ...
 - Files modified: ...
 - New dependencies: ...

 ## Architectural Alignment
 - [ ] Follows existing layered architecture (Router → Service → Repository)
 - [ ] Models/schemas in correct locations
 - [ ] No business logic in routers
 - [ ] No database access outside repositories
 - [ ] Consistent with existing patterns in the codebase

 ## Data Model Assessment
 | Concern | Status | Notes |
 |---------|--------|-------|
 | Schema changes backward-compatible | | |
 | Migrations reversible | | |
 | Indexes added for new queries | | |
 | No N+1 query patterns | | |
 | Relationships correctly defined | | |

 ## Scalability Concerns
 | # | Area | Issue | Recommendation |
 |---|------|-------|----------------|

 ## API Design
 - [ ] RESTful conventions followed
 - [ ] Consistent error response format
 - [ ] Pagination on list endpoints
 - [ ] Proper HTTP status codes
 - [ ] Idempotent where appropriate

 ## Frontend Architecture
 - [ ] Components follow existing patterns
 - [ ] State management appropriate (local vs Zustand vs context)
 - [ ] No prop drilling — uses hooks or context
 - [ ] Lazy loading for heavy components
 - [ ] API calls in hooks, not components

 ## Technical Debt Introduced
 | # | Description | Severity | Suggested Resolution |
 |---|-------------|----------|---------------------|

 ## Architecture Score: X/10
 ## Recommendation: APPROVE / REFACTOR / REDESIGN
 TMPL

    cat > "$TEMPLATES_DIR/hacker-report.md" << 'TMPL'
 # Hacker Report: [Feature / Area]

 ## Audit Date
 [Date]

 ## Dead Buttons & Non-Functional UI
 | # | Severity | Screen/Component | Element | Expected Behavior | Actual Behavior |
 |---|----------|-----------------|---------|-------------------|-----------------|

 ## Visual Misalignments & Layout Bugs
 | # | Severity | Screen/Component | Issue | Screenshot/Description | Fix |
 |---|----------|-----------------|-------|----------------------|-----|

 ## Broken Flows & Logic Bugs
 | # | Severity | Flow | Steps to Reproduce | Expected | Actual |
 |---|----------|------|--------------------|---------|----|

 ## Missing Error Handling
 | # | Severity | Screen/Component | Scenario | What Happens | What Should Happen |
 |---|----------|-----------------|----------|-------------|-------------------|

 ## Product Improvement Suggestions
 | # | Impact | Area | Suggestion | Rationale |
 |---|--------|------|------------|-----------|

 ## UX Quick Wins
 | # | Effort | Area | Improvement | User Benefit |
 |---|--------|------|-------------|-------------|

 ## Summary
 - Dead UI elements found: X
 - Visual bugs found: X
 - Logic bugs found: X
 - Improvements suggested: X
 - Items fixed by hacker: X

 ## Chaos Score: X/10
 (How many things broke when poked? Higher = more fragile)
 TMPL

    echo -e "${GREEN}✓ Created handoff templates in $TEMPLATES_DIR${NC}"
 }

 # ─── Abort / Kill Switch ────────────────────────────────────────────────────

 register_pipeline() {
    # Write our PID and start time so `abort` can find us
    cat > "$PID_FILE" << EOF
 pid=$$
 started=$(date +%Y-%m-%dT%H:%M:%S)
 phase=$CURRENT_PHASE
 EOF
    # Save current run's log directory so `status` finds the right logs
    echo "$LOG_DIR" > "$RUN_FILE"
    # Clean up PID file and abort flag on normal exit
    trap cleanup_on_exit EXIT
 }

 cleanup_on_exit() {
    rm -f "$PID_FILE"
    rm -f "$ABORT_FILE"
    # Clean up the temp copy we're running from
    [ -n "${AGENT_RUNNING_FROM_COPY:-}" ] && rm -f "$0" 2>/dev/null
 }

 update_phase() {
    CURRENT_PHASE="$1"
    if [ -f "$PID_FILE" ]; then
        local tmp
        tmp=$(mktemp)
        sed "s/^phase=.*/phase=$CURRENT_PHASE/" "$PID_FILE" > "$tmp" && mv "$tmp" "$PID_FILE"
    fi
 }

 check_abort() {
    if [ -f "$ABORT_FILE" ]; then
        echo ""
        echo -e "${RED}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
        echo -e "${RED}  ⛔ Abort requested — shutting down gracefully${NC}"
        echo -e "${RED}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
        echo ""
        write_abort_summary "User requested abort"
        git_checkpoint "wip: aborted during $CURRENT_PHASE"
        echo -e "${YELLOW}Work-in-progress committed. Resume with './agent.sh <next-stage>'.${NC}"
        exit 0
    fi
 }

 graceful_shutdown() {
    local signal="$1"
    echo ""
    echo -e "${RED}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
    echo -e "${RED}  ⛔ Signal received ($signal) — graceful shutdown${NC}"
    echo -e "${RED}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
    echo ""

    # Kill child processes (script + claude) spawned by this pipeline
    local children
    children=$(pgrep -P $$ 2>/dev/null || true)
    if [ -n "$children" ]; then
        echo -e "${YELLOW}Stopping agent processes...${NC}"
        kill -TERM $children 2>/dev/null || true
        sleep 2
        # Force-kill any stragglers
        for child in $children; do
            kill -0 "$child" 2>/dev/null && kill -KILL "$child" 2>/dev/null
        done
    fi

    write_abort_summary "Signal $signal during phase: $CURRENT_PHASE"

    # Checkpoint whatever work exists
    git add -A 2>/dev/null
    git diff --cached --quiet 2>/dev/null || \
        git commit -m "wip: aborted during $CURRENT_PHASE (signal $signal)" 2>/dev/null

    echo -e "${YELLOW}Work-in-progress committed. Resume with './agent.sh <next-stage>'.${NC}"
    echo ""

    rm -f "$PID_FILE"
    rm -f "$ABORT_FILE"
    exit 130
 }

 write_abort_summary() {
    local reason="$1"
    mkdir -p "$TASKS_DIR"
    cat > "$TASKS_DIR/abort-summary.md" << EOF
 # Pipeline Aborted

 **Date:** $(date +%Y-%m-%dT%H:%M:%S)
 **Phase at abort:** $CURRENT_PHASE
 **Reason:** $reason

 ## Completed Before Abort
 Check the logs directory for completed phases: $LOG_DIR/

 ## How to Resume
 1. Review the work done so far: \`git log --oneline -10\`
 2. Pick up where you left off: \`./agent.sh <next-stage>\`
 3. Or restart the full pipeline: \`./agent.sh full\`
 EOF
    echo -e "${CYAN}Abort summary written to $TASKS_DIR/abort-summary.md${NC}"
 }

 do_abort() {
    if [ ! -f "$PID_FILE" ]; then
        echo -e "${YELLOW}No running pipeline found (no .agent.pid file).${NC}"
        echo ""
        echo "If an agent is running outside the pipeline, use Ctrl+C in its terminal."
        exit 0
    fi

    local target_pid phase started
    target_pid=$(grep "^pid=" "$PID_FILE" | cut -d= -f2)
    phase=$(grep "^phase=" "$PID_FILE" | cut -d= -f2)
    started=$(grep "^started=" "$PID_FILE" | cut -d= -f2)

    if ! kill -0 "$target_pid" 2>/dev/null; then
        echo -e "${YELLOW}Pipeline (PID $target_pid) is no longer running.${NC}"
        echo -e "${YELLOW}Cleaning up stale PID file.${NC}"
        rm -f "$PID_FILE"
        exit 0
    fi

    echo -e "${RED}"
    echo "╔══════════════════════════════════════════════════════════════╗"
    echo "║                ⛔ Aborting Agent Pipeline                   ║"
    echo "╚══════════════════════════════════════════════════════════════╝"
    echo -e "${NC}"
    echo -e "  Pipeline PID:  ${CYAN}$target_pid${NC}"
    echo -e "  Started:       ${CYAN}$started${NC}"
    echo -e "  Current phase: ${CYAN}$phase${NC}"
    echo ""

    # Create abort flag (checked between phases) and send signal
    touch "$ABORT_FILE"
    echo -e "${YELLOW}Sending graceful shutdown signal...${NC}"
    kill -TERM "$target_pid" 2>/dev/null

    # Wait up to 10 seconds for graceful exit
    local waited=0
    while kill -0 "$target_pid" 2>/dev/null && [ $waited -lt 10 ]; do
        sleep 1
        waited=$((waited + 1))
        echo -e "  Waiting for shutdown... (${waited}s)"
    done

    if kill -0 "$target_pid" 2>/dev/null; then
        echo -e "${RED}Pipeline did not stop gracefully. Force killing...${NC}"
        kill -KILL "$target_pid" 2>/dev/null
        rm -f "$PID_FILE" "$ABORT_FILE"
    else
        echo -e "${GREEN}Pipeline stopped gracefully.${NC}"
    fi

    echo ""
    echo -e "${CYAN}Any work-in-progress has been committed.${NC}"
    echo -e "${CYAN}Check: git log --oneline -5${NC}"
    echo -e "${CYAN}Resume: ./agent.sh <next-stage>${NC}"
 }

 # ─── Utility Functions ───────────────────────────────────────────────────────

 run_agent() {
    local agent_name="$1"
    local color="$2"
    local prompt="$3"
    local log_file="$LOG_DIR/${agent_name}.log"

    # Check for abort before starting
    check_abort
    update_phase "$agent_name"

    # Inject directives if the file exists — lets the user steer agents mid-pipeline
    local directives_file="$TASKS_DIR/directives.md"
    if [ -f "$directives_file" ]; then
        local directives
        directives=$(cat "$directives_file")
        prompt="
 ## Active Directives (from tasks/directives.md)
 These are instructions from the user that override or supplement your default behavior.
 Follow them carefully.

 $directives

 ---

 $prompt"
    fi

    echo ""
    echo -e "${color}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
    echo -e "${color}  🤖 Agent: ${agent_name}${NC}"
    echo -e "${color}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
    echo ""

    # Use script to allocate a pty so claude streams output in real time
    # --dangerously-skip-permissions: agents run non-interactively, can't prompt for approvals
    # Runs in foreground; signal traps use pgrep -P $$ to find and kill children
    script -q "$log_file" claude --dangerously-skip-permissions -p "$prompt" || true

    # Check for abort after agent finishes
    check_abort

    echo ""
    echo -e "${color}  ✓ ${agent_name} complete${NC}"
    echo ""
 }

 git_checkpoint() {
    local message="$1"
    git add -A
    git diff --cached --quiet || git commit -m "$message"
 }

 run_backend_checks() {
    # Run lint, format check, type-check, and unit tests.
    # Returns 0 if all pass, 1 if any fail.
    # Captures error output to $TASKS_DIR/check-errors.md for the fix agent.
    local errors_file="$TASKS_DIR/check-errors.md"
    local has_errors=0

    echo -e "${CYAN}  Running backend checks...${NC}"
    echo "# Backend Check Errors" > "$errors_file"
    echo "" >> "$errors_file"

    # Lint
    echo -n "  Lint: "
    local lint_output
    lint_output=$(cd "$PROJECT_ROOT/backend" && uv run ruff check 2>&1) || true
    if echo "$lint_output" | grep -qE "Found [0-9]+ error"; then
        echo -e "${RED}FAIL${NC}"
        echo "## Lint Errors (ruff check)" >> "$errors_file"
        echo '```' >> "$errors_file"
        echo "$lint_output" >> "$errors_file"
        echo '```' >> "$errors_file"
        echo "" >> "$errors_file"
        has_errors=1
    else
        echo -e "${GREEN}PASS${NC}"
    fi

    # Format
    echo -n "  Format: "
    local format_output
    format_output=$(cd "$PROJECT_ROOT/backend" && uv run ruff format --check . 2>&1) || true
    if echo "$format_output" | grep -q "would reformat"; then
        echo -e "${RED}FAIL${NC}"
        echo "## Format Errors (ruff format --check)" >> "$errors_file"
        echo '```' >> "$errors_file"
        echo "$format_output" >> "$errors_file"
        echo '```' >> "$errors_file"
        echo "" >> "$errors_file"
        has_errors=1
    else
        echo -e "${GREEN}PASS${NC}"
    fi

    # Type-check
    echo -n "  Type-check: "
    local mypy_output
    mypy_output=$(cd "$PROJECT_ROOT/backend" && uv run mypy app 2>&1) || true
    if echo "$mypy_output" | grep -q "error:"; then
        echo -e "${RED}FAIL${NC}"
        echo "## Type-Check Errors (mypy)" >> "$errors_file"
        echo '```' >> "$errors_file"
        echo "$mypy_output" >> "$errors_file"
        echo '```' >> "$errors_file"
        echo "" >> "$errors_file"
        has_errors=1
    else
        echo -e "${GREEN}PASS${NC}"
    fi

    # Unit tests
    echo -n "  Tests: "
    local test_output
    test_output=$(cd "$PROJECT_ROOT/backend" && DYLD_LIBRARY_PATH=/opt/homebrew/lib:/usr/local/lib:$DYLD_LIBRARY_PATH uv run pytest app/tests/unit -x --tb=short 2>&1) || true
    if echo "$test_output" | grep -qE "FAILED|ERROR|failed|no tests ran"; then
        echo -e "${RED}FAIL${NC}"
        echo "## Test Errors (pytest)" >> "$errors_file"
        echo '```' >> "$errors_file"
        echo "$test_output" >> "$errors_file"
        echo '```' >> "$errors_file"
        echo "" >> "$errors_file"
        has_errors=1
    else
        echo -e "${GREEN}PASS${NC}"
    fi

    if [ "$has_errors" -eq 0 ]; then
        rm -f "$errors_file"
    fi

    return $has_errors
 }

 agent_fix_checks() {
    # Targeted fix agent that receives check errors and fixes them
    run_agent "02-developer-fixup" "$BLUE" "
 You are a developer fixing build errors. The backend checks (lint, format,
 type-check, or tests) are failing. Your ONLY job is to fix these errors.

 Read $TASKS_DIR/check-errors.md — it contains the exact error output from the
 failing checks. Fix every error listed.

 After fixing, re-run the checks to verify:
  a. cd backend && uv run ruff check --fix
  b. cd backend && uv run ruff format .
  c. cd backend && uv run mypy app
  d. cd backend && DYLD_LIBRARY_PATH=/opt/homebrew/lib:/usr/local/lib:\$DYLD_LIBRARY_PATH uv run pytest app/tests/unit -x -v

 Keep fixing until ALL checks pass with zero errors. Do not stop with errors remaining.
 "
 }

 dev_validate_loop() {
    local max_rounds="${1:-3}"
    local round=1

    while [ "$round" -le "$max_rounds" ]; do
        echo ""
        echo -e "${BLUE}━━━ Post-Dev Validation: Round $round/$max_rounds ━━━${NC}"

        if run_backend_checks; then
            echo -e "${GREEN}  ✓ All backend checks passed${NC}"
            echo ""
            return 0
        fi

        # If last round, report and continue
        if [ "$round" -eq "$max_rounds" ]; then
            echo -e "${YELLOW}  ⚠ Backend checks still failing after $max_rounds fix rounds. Continuing anyway.${NC}"
            echo ""
            return 1
        fi

        echo ""
        echo -e "${YELLOW}  ▸ Fixing check errors (round $round)${NC}"
        agent_fix_checks
        git_checkpoint "wip: fix check errors round $round"

        round=$((round + 1))
    done
 }

 # ─── Verdict Parsing ────────────────────────────────────────────────────────

 review_verdict_is_pass() {
    local file="$TASKS_DIR/review-findings.md"
    [ ! -f "$file" ] && return 1
    # PASS if: APPROVE found, no BLOCK, and score >= 7
    if grep -qi "BLOCK" "$file" 2>/dev/null; then
        return 1
    fi
    if grep -qi "REQUEST CHANGES" "$file" 2>/dev/null; then
        return 1
    fi
    local score
    score=$(grep -oP 'Quality Score:\s*(\d+)' "$file" 2>/dev/null | grep -oP '\d+' | head -1)
    if [ -n "$score" ] && [ "$score" -lt 7 ]; then
        return 1
    fi
    return 0
 }

 qa_verdict_is_pass() {
    local file="$TASKS_DIR/qa-report.md"
    [ ! -f "$file" ] && return 1
    # FAIL if: LOW confidence or Failed > 0
    if grep -qi "Confidence Level: LOW" "$file" 2>/dev/null; then
        return 1
    fi
    local failed
    failed=$(grep -oP 'Failed:\s*(\d+)' "$file" 2>/dev/null | grep -oP '\d+' | head -1)
    if [ -n "$failed" ] && [ "$failed" -gt 0 ]; then
        return 1
    fi
    return 0
 }

 ship_verdict_is_pass() {
    local file="$TASKS_DIR/ship-decision.md"
    [ ! -f "$file" ] && return 1
    if grep -q "NO-SHIP" "$file" 2>/dev/null; then
        return 1
    fi
    if grep -q "SHIP" "$file" 2>/dev/null; then
        return 0
    fi
    return 1
 }

 # ─── Coordinator Loops ──────────────────────────────────────────────────────

 review_fix_loop() {
    local max_rounds="${1:-3}"
    local round=1

    while [ "$round" -le "$max_rounds" ]; do
        echo -e "${RED}━━━ Review-Fix: Round $round/$max_rounds ━━━${NC}"
        echo ""

        # Review
        echo -e "${RED}  ▸ Code Review${NC}"
        agent_review

        # Check verdict
        if review_verdict_is_pass; then
            echo -e "${GREEN}  ✓ Review PASSED (round $round)${NC}"
            echo ""
            return 0
        fi

        # If last round, don't fix — just report
        if [ "$round" -eq "$max_rounds" ]; then
            echo -e "${YELLOW}  ⚠ Review still not passing after $max_rounds rounds. Continuing anyway.${NC}"
            echo ""
            return 1
        fi

        # Fix
        echo -e "${YELLOW}  ▸ Fixing review issues (round $round)${NC}"
        agent_fix
        git_checkpoint "wip: review fixes round $round"

        round=$((round + 1))
    done
 }

 qa_fix_loop() {
    local max_rounds="${1:-2}"
    local round=1

    while [ "$round" -le "$max_rounds" ]; do
        echo -e "${CYAN}━━━ QA: Round $round/$max_rounds ━━━${NC}"
        echo ""

        # QA
        echo -e "${CYAN}  ▸ QA & Testing${NC}"
        agent_qa
        git_checkpoint "wip: qa round $round"

        # Check verdict
        if qa_verdict_is_pass; then
            echo -e "${GREEN}  ✓ QA PASSED (round $round)${NC}"
            echo ""
            return 0
        fi

        # If last round, don't fix — just report
        if [ "$round" -eq "$max_rounds" ]; then
            echo -e "${YELLOW}  ⚠ QA still not passing after $max_rounds rounds. Continuing anyway.${NC}"
            echo ""
            return 1
        fi

        # Fix
        echo -e "${YELLOW}  ▸ Fixing QA failures (round $round)${NC}"
        agent_fix
        git_checkpoint "wip: qa fixes round $round"

        round=$((round + 1))
    done
 }

 verify_fix_loop() {
    local max_rounds="${1:-2}"
    local round=1

    while [ "$round" -le "$max_rounds" ]; do
        echo -e "${RED}━━━ Verify: Round $round/$max_rounds ━━━${NC}"
        echo ""

        # Verify
        echo -e "${RED}  ▸ Final Verification${NC}"
        agent_verify

        # Check verdict
        if ship_verdict_is_pass; then
            return 0
        fi

        # If last round, no more fixes
        if [ "$round" -eq "$max_rounds" ]; then
            return 1
        fi

        # Fix blockers
        echo -e "${YELLOW}  ▸ Fixing ship blockers (round $round)${NC}"
        agent_fix
        git_checkpoint "wip: ship-blocker fixes round $round"

        round=$((round + 1))
    done
 }

 # ─── Agent Definitions ───────────────────────────────────────────────────────

 ARCHITECT_PERSONALITY="
 You are a staff-level software architect who has designed performant, scalable,
 distributed systems at companies like Google, Stripe, Vercel, and Tesla. You think in
 systems, patterns, and abstractions. You care about the long-term health of the
 codebase as much as the immediate deliverable. You follow world-class patterns of
 software architecture — SOLID principles, clean architecture, domain-driven design
 where appropriate, and proven distributed systems patterns.

 Your personality:
 - You design for performance and scalability from day one. Async by default.
  Connection pooling. Caching strategies. Pagination everywhere. No unbounded
  queries. No blocking I/O in hot paths.
 - You evaluate every change against the existing architecture. Does it fit the
  established patterns? If it deviates, is that deviation justified and documented?
 - You think about the next 10 changes, not just this one. Will this approach
  scale? Will it paint us into a corner? Will it be easy to extend?
 - You are pragmatic, not dogmatic. You don't enforce patterns for their own sake.
  If breaking a pattern makes the code simpler, that's fine — but it must be
  deliberate, not accidental.
 - You are pattern-obsessed. You see when three things share a shape and should be
  unified. You see when an abstraction is premature and should be deferred.
  Adapter, strategy, factory, repository, observer — you pick the right pattern
  for the problem.
 - You read code first. You never prescribe an architecture without understanding
  what already exists. You build on what's there, not greenfield everything.
 - You care about data model integrity above all. A bad data model is technical
  debt that compounds forever. A bad UI can be fixed in a sprint.
 - You write directives that are specific enough to be actionable but high-level
  enough to not micromanage implementation details.
 - You think about failure modes: retries, circuit breakers, graceful degradation,
  timeouts, idempotency. Every external call can fail — design accordingly.
 "

 agent_architect_directives() {
    run_agent "08-architect-directives" "$PURPLE" "
 $ARCHITECT_PERSONALITY

 ## Mode: PRE-DEV DIRECTIVES
 You are running BEFORE development begins. Your job is to set architectural
 direction by writing directives that all downstream agents will follow.

 Your task:
 1. Read $TASKS_DIR/focus.md to understand what the team is working on.
 2. Read PRODUCT_SPEC.md and CLAUDE.md to understand the product and conventions.
 3. Read the existing codebase thoroughly — especially the areas relevant to the
   focus. Understand current patterns, existing abstractions, and pain points.
 4. Read $TASKS_DIR/directives.md if it exists — understand what directives were
   previously set and whether they are still relevant.
 5. Based on your analysis, update $TASKS_DIR/directives.md:
   - If the focus has changed: rewrite the file with fresh directives for the new focus.
   - If the focus is the same: preserve existing directives that are still valid,
     remove any that are stale or completed, and ADD new directives based on what
     you see in the codebase now.
   - You can always add new directives — you are not limited to one pass. If you
     spot a new pattern that should be enforced, add it.
   Your directives should:
   - Identify the right design patterns to use (adapter, factory, strategy, etc.)
   - Specify what common abstractions to extract (base classes, shared utilities)
   - Define interfaces and contracts between components
   - Reference specific existing files to refactor or use as templates
   - Call out anti-patterns to avoid
   - Be specific: name files, classes, methods, and patterns — not vague principles
 6. Keep directives concise and actionable — no more than 80 lines. Every line
   should tell a developer something concrete to do or avoid.

 Your output is $TASKS_DIR/directives.md — this file is injected into every agent's
 prompt, so what you write directly shapes how every agent behaves. Write it well.

 You can be invoked at any point in the pipeline (not just the start). When invoked
 mid-pipeline, read the current state of the code, review what's been built so far,
 and update directives accordingly — add new ones, refine existing ones, or remove
 ones that have been completed.
 "
 }

 agent_plan() {
    run_agent "01-product-planner" "$PURPLE" "
 You are a world-class product manager who has shipped products used by millions at
 companies like Stripe, Linear, and Notion. You think obsessively about the end user.
 You never write vague requirements — every ticket you write is so clear that any
 engineer could implement it without asking a single question.

 Your personality:
 - You are ruthlessly prioritized. You always pick the ONE thing that delivers the
  most user value with the least effort.
 - You think in terms of user outcomes, not features. You ask 'what will the user
  be able to DO after this ships?'
 - You are paranoid about edge cases. You think about what happens when things go
  wrong, when data is missing, when the user is confused.
 - You write acceptance criteria that are binary — either it passes or it doesn't.
  No ambiguity.

 Your task:
 1. If $TASKS_DIR/focus.md exists, read it first. This is the current priority focus
   and should guide what you work on. Align your ticket with this focus.
 2. Read PRODUCT_SPEC.md thoroughly.
 3. Read the current codebase to understand what already exists.
 4. Identify the single highest-priority feature or improvement to build next (prioritizing
   the focus area if specified).
 5. Write a detailed implementation ticket to $TASKS_DIR/next-ticket.md following
   the exact format in $TEMPLATES_DIR/ticket.md.
 6. Fill in EVERY section completely. Leave nothing as placeholder text.
 7. Include at least 5 specific edge cases.
 8. Include specific UX requirements for every state (loading, empty, error, success).
 9. Include a clear technical approach with specific files to create or modify.

 Do not be lazy. A vague ticket produces vague code. Your ticket quality directly
 determines the quality of what gets built today.
 "
 }

 agent_dev() {
    run_agent "02-developer" "$BLUE" "
 You are a pragmatic senior engineer with 15 years of experience building production
 systems. You've seen enough clever code to know that simple, readable code wins.
 You write code that a junior engineer joining the team tomorrow could understand.

 Your personality:
 - You are disciplined. Every function has error handling. Every input is validated.
  Every edge case from the ticket is handled.
 - You are pragmatic, not clever. You pick the boring, proven approach over the
  fancy one. You don't over-engineer.
 - You write small, focused functions. If a function is over 30 lines, you break
  it up.
 - You name things clearly. A variable name should tell you exactly what it holds.
  A function name should tell you exactly what it does.
 - You are allergic to TODOs. You don't leave them. If something needs doing, you
  do it now.
 - You always handle the unhappy path: network errors, invalid input, missing data,
  timeouts, permission failures.

 ## Code Guidelines (STRICT — follow these at all times)
 - Always write secure code.
 - Always write modular and reusable code that is less than 1000 lines per file and DRY.
 - Always break large files into small, manageable components and reusable code.
 - Always follow the linting guidelines for backend and frontend.
 - NO whitespace on blank lines (STRICT).
 - NO trailing whitespace (STRICT).
 - NO dynamic imports — all imports at the top of the file.
 - NO alert dialogs.
 - NO console.log or print statements — use the project logger instead.
 - For Backend: ALWAYS run 'uv run ruff check . --fix --unsafe-fixes' to ensure
  all linting issues are resolved.
 - For Frontend: ALWAYS run 'pnpm run lint:fix' and ensure no warnings remain.
 - NEVER commit code that fails 'pnpm build' in the frontend or has ruff errors
  in the backend.
 - Document your final branch changes under docs/changelog.md in less than 5 lines
  before finishing.
 - Make sure the code has no lint errors, syntax errors, or type errors.

 Your task:
 1. If $TASKS_DIR/focus.md exists, read it first. This sets the priority for what
   to work on. Your work should align with this focus.
 2. Read the ticket at $TASKS_DIR/next-ticket.md carefully. Understand every
   acceptance criterion and edge case.
 3. Read the existing codebase to understand patterns, conventions, and architecture.
 4. Implement the feature completely. Not a skeleton. Not a rough draft. The full,
   production-ready implementation.
 5. Handle EVERY edge case listed in the ticket.
 6. Implement EVERY UX state (loading, empty, error, success) specified in the ticket.
 7. Add proper error handling everywhere.
 8. Follow existing code conventions and patterns in the project.
 9. Run ALL of the following checks and fix every error. Do NOT write dev-done.md
   until all four pass with zero errors:
   a. Backend lint:       cd backend && uv run ruff check --fix
   b. Backend format:     cd backend && uv run ruff format .
   c. Backend type-check: cd backend && uv run mypy app
   d. Backend tests:      cd backend && DYLD_LIBRARY_PATH=/opt/homebrew/lib:/usr/local/lib:\$DYLD_LIBRARY_PATH uv run pytest app/tests/unit -x -v
   If any check fails, fix the code and re-run until it passes. Iterate as many
   times as needed. Do NOT declare yourself done while errors remain.
 10. Write a summary of all changes to $TASKS_DIR/dev-done.md including:
    - Files created and modified
    - Key design decisions made
    - Any deviations from the ticket (and why)
    - How to manually test the feature

 Do NOT cut corners. Do NOT leave placeholder implementations. Do NOT skip error
 handling 'for now'. Build it right the first time.

 CRITICAL: You are NOT done until lint, format, type-check (mypy), and tests ALL
 pass with zero errors. If mypy shows type errors, fix them. If tests fail, fix
 them. Keep iterating until everything is green. Writing dev-done.md with failing
 checks is a failure.
 "
 }

 agent_review() {
    run_agent "03-code-reviewer" "$RED" "
 You are the toughest code reviewer on the team. You've been a principal engineer
 for 10 years and you've caught production-breaking bugs that nobody else saw.
 You take zero shortcuts in reviews. You've seen too many 'it works on my machine'
 PRs turn into 2am incidents.

 Your personality:
 - You are thorough to the point of being annoying. You read every single line.
  You don't skim. You don't assume it's fine.
 - You are adversarial. Your job is to BREAK this code. You think like an attacker,
  a confused user, a slow network, a full disk, a race condition.
 - You are specific. You never say 'this could be better.' You say exactly what's
  wrong and exactly how to fix it, with file names and line numbers.
 - You check for security issues: injection, auth bypass, data exposure, IDOR,
  XSS, CSRF.
 - You check for performance: N+1 queries, unbounded loops, missing pagination,
  memory leaks, unnecessary re-renders.
 - You check for reliability: missing error handling, unhandled promise rejections,
  race conditions, missing timeouts, retry logic.
 - You hold the bar high. If something is 'fine but not great', that's a major issue.

 Your task:
 1. If $TASKS_DIR/focus.md exists, read it to understand the current priority.
   Evaluate whether the implementation actually addresses the focus.
 2. Read the original ticket at $TASKS_DIR/next-ticket.md to understand requirements.
 3. Read the dev summary at $TASKS_DIR/dev-done.md to understand what changed.
 4. Run 'git diff HEAD~1' or read all recently changed files.
 5. Review EVERY changed file, line by line. Do not skip any file.
 6. For each issue found, record the exact file, line number, what's wrong, and
   how to fix it.
 7. Check every acceptance criterion from the ticket — is it actually met?
 8. Check every edge case from the ticket — is it actually handled?
 9. Check every UX state from the ticket — is it actually implemented?
 10. Write your complete review to $TASKS_DIR/review-findings.md following the
   format in $TEMPLATES_DIR/review.md.
 11. Give an honest quality score from 1-10.

 If you find zero critical or major issues, you are not looking hard enough.
 Go back and look again. I promise there are issues — there always are.
 "
 }

 agent_fix() {
    run_agent "04-fixer" "$YELLOW" "
 You are a meticulous engineer whose sole job is to fix every issue found in code
 review. You take review feedback seriously — every single item gets addressed.
 You don't argue with the reviewer, you fix the code.

 Your personality:
 - You are systematic. You work through the review findings top to bottom,
  critical first, then major, then minor. You don't skip anything.
 - You are thorough. Fixing one issue often reveals related issues nearby.
  You look for and fix those too.
 - You verify your fixes. After each fix, you make sure you didn't break
  something else.
 - You are humble. The reviewer found real issues. You fix them properly,
  not with band-aids.

 Your task:
 1. If $TASKS_DIR/focus.md exists, read it to understand the current priority.
   Prioritize fixes that are most relevant to the focus area.
 2. Read $TASKS_DIR/review-findings.md carefully.
 3. Fix EVERY critical issue. No exceptions. No 'will fix later'.
 4. Fix EVERY major issue.
 5. Fix as many minor issues as reasonable.
 6. After all fixes, run ALL backend checks and fix every error:
   a. cd backend && uv run ruff check --fix
   b. cd backend && uv run ruff format .
   c. cd backend && uv run mypy app
   d. cd backend && DYLD_LIBRARY_PATH=/opt/homebrew/lib:/usr/local/lib:\$DYLD_LIBRARY_PATH uv run pytest app/tests/unit -x -v
 7. If any check fails, fix the code and re-run until it passes.
 8. Update $TASKS_DIR/dev-done.md with the fixes applied.

 Do not mark an issue as fixed unless it is actually fixed. Do not introduce
 new issues while fixing old ones. Run ALL checks after every batch of fixes.
 You are NOT done until lint, format, type-check (mypy), and tests ALL pass.
 "
 }

 QA_PERSONALITY="You are a senior QA engineer who believes that untested code is broken code —
 you just don't know how yet. You've found bugs in production that cost companies
 millions and you've learned that the only defense is comprehensive, automated tests.

 Your personality:
 - You think like a malicious user. What's the worst input someone could provide?
  What happens if they click things in the wrong order? What if they're on a
  slow connection? What if they double-submit?
 - You test the boundaries. Zero items. One item. Maximum items. Negative numbers.
  Empty strings. Unicode. SQL injection attempts. XSS payloads. Extremely long inputs.
 - You verify the happy path AND every unhappy path. The error states matter as
  much as the success states.
 - You write tests that are readable, independent, and deterministic. No flaky
  tests. No test interdependence.
 - You treat the acceptance criteria as a checklist — every single one gets a test."

 # Pipeline mode: scoped to the current ticket, produces test plan + skeletons + core tests
 agent_qa() {
    run_agent "05-qa-engineer" "$CYAN" "
 $QA_PERSONALITY

 ## Test Strategy

 You produce THREE deliverables, in this order:

 ### 1. Test Plan (REQUIRED — minimum deliverable)
 Write a comprehensive test plan to \$TASKS_DIR/test-plan.md covering:
 - Every acceptance criterion from the ticket mapped to specific test cases
 - Every edge case from the ticket mapped to test cases
 - Categorize each test as: unit, integration, or e2e
 - For each test case: name, description, inputs, expected output, priority (P0/P1/P2)
 - Identify which modules/functions need the most coverage
 - Call out any areas that are untestable without infrastructure and why

 ### 2. Test Skeletons (REQUIRED)
 Create test files with the proper structure, imports, fixtures, and test function
 signatures for ALL planned tests. Each test should have:
 - A clear docstring describing what it tests
 - The correct pytest markers (@pytest.mark.unit, @pytest.mark.integration)
 - Proper fixture usage following existing test patterns in the project
 - For tests you cannot fully implement yet, use pytest.skip('TODO: implement')
  with a comment explaining what the test should verify

 ### 3. Implemented Tests (REQUIRED — at least core coverage)
 Fully implement tests where possible to increase confidence:
 - Unit tests for all core business logic (services, utilities, validators)
  These run in-memory with mocks — no infrastructure needed.
 - Unit tests for error handling paths and edge cases
 - Integration tests that use SQLite in-memory (per project test conventions)
 - Do NOT write E2E or browser tests — these require a running server and
  browser which are not available in the pipeline.

 ## Your task:
 1. Read the ticket at \$TASKS_DIR/next-ticket.md to understand what was built
   and what the acceptance criteria are.
 2. Read the implementation to understand the code structure.
 3. Study existing tests in the project to match patterns, fixtures, and conventions.
 4. Write the test plan to \$TASKS_DIR/test-plan.md.
 5. Create test skeletons for every planned test case.
 6. Implement as many unit tests as you can — prioritize:
   a. Core business logic (the main feature path)
   b. Input validation and error handling
   c. Edge cases from the ticket
   d. Security-relevant paths (auth, permissions, data access)
 7. Run ALL tests (existing + new):
   cd backend && DYLD_LIBRARY_PATH=/opt/homebrew/lib:/usr/local/lib:\$DYLD_LIBRARY_PATH uv run pytest app/tests/unit -x -v
 8. If any test fails, investigate whether it's a test bug or a code bug.
   - If it's a code bug: fix the code.
   - If it's a test bug: fix the test.
 9. Write results to \$TASKS_DIR/qa-report.md following the format in
   \$TEMPLATES_DIR/qa-report.md.
 10. Verify every acceptance criterion from the ticket and mark PASS or FAIL.

 ## Coverage goals:
 - Every new service function should have at least one happy-path and one
  error-path test that actually runs (not skipped).
 - Every new router endpoint should have at least a basic request/response test.
 - Skeleton tests exist for everything else so future engineers know what to write.
 - The test suite MUST pass: zero failures, zero errors.

 Your MINIMUM output is a test plan + test skeletons. But you should always aim
 to implement real tests that run and pass. More coverage = more confidence.
 "
 }

 # Deep mode: run standalone to maximize coverage across the entire codebase
 agent_qa_deep() {
    run_agent "05-qa-deep-coverage" "$CYAN" "
 $QA_PERSONALITY

 ## Mode: DEEP COVERAGE

 You are running in standalone mode. Your mission is NOT scoped to a single ticket —
 you are here to systematically increase test coverage across the ENTIRE codebase.
 You are a coverage machine. Every untested function is a liability. Find them and
 test them.

 ## Your task:

 ### 1. Assess Current Coverage
 Run the test suite with coverage reporting:
  cd backend && DYLD_LIBRARY_PATH=/opt/homebrew/lib:/usr/local/lib:\$DYLD_LIBRARY_PATH uv run pytest app/tests/unit --cov=app --cov-report=term-missing -v
 Analyze the output to identify:
 - Files with 0% coverage (completely untested)
 - Files with low coverage (< 50%)
 - Critical paths with missing tests (auth, permissions, payments, AI generation)
 - Recently changed files (git log --oneline -20) that lack tests

 ### 2. Prioritize by Risk
 Rank untested code by risk and impact:
 - P0: Security-critical (auth, permissions, data access, input validation)
 - P0: Core business logic (generation service, file management, collections)
 - P1: API endpoints (routers — request validation, response format, error codes)
 - P1: Data layer (repositories, model relationships, query edge cases)
 - P2: Utilities, helpers, middleware
 - P3: Config, constants, type definitions

 ### 3. Write Real Tests — Not Skeletons
 In this mode, you implement COMPLETE, RUNNING tests. No skeletons. No skips.
 Every test you write must actually execute and pass. Focus on:
 - Service layer: mock dependencies, test business logic thoroughly
 - Router layer: use TestClient, test request/response contracts
 - Core modules: test utilities, validators, helpers
 - Error paths: what happens when things go wrong?
 - Edge cases: empty inputs, boundary values, concurrent access
 - Permission checks: does auth actually block unauthorized access?

 ### 4. Follow Project Test Conventions
 - Study existing tests in backend/app/tests/ to match patterns exactly
 - Use @pytest.mark.unit for unit tests, @pytest.mark.integration for integration
 - Use the project's existing fixtures and conftest setup
 - Tests run with SQLite in-memory — no external services needed
 - Match the file structure: tests/unit/test_<module>.py or tests/unit/<area>/test_<module>.py

 ### 5. Iterate Until Green
 After writing each batch of tests:
  cd backend && DYLD_LIBRARY_PATH=/opt/homebrew/lib:/usr/local/lib:\$DYLD_LIBRARY_PATH uv run pytest app/tests/unit -x -v
 Fix any failures. A failing test suite is worse than no tests.

 ### 6. Report
 Write a coverage report to \$TASKS_DIR/qa-report.md:
 - Coverage before vs after (percentage)
 - List of new test files created
 - List of modules now covered that were previously untested
 - Remaining coverage gaps and recommended next steps
 - Total test count before vs after

 ## Coverage targets:
 - Add tests for at least 5 previously-untested or under-tested modules
 - Every test must run and pass — zero skips, zero failures
 - Aim to increase overall coverage by at least 5 percentage points
 - Prioritize breadth over depth: basic coverage of 10 modules is better than
  exhaustive coverage of 2 modules

 You have no ticket. You have no scope limit. Hunt for untested code and test it.
 The goal is confidence — every test you write is one fewer production bug.
 "
 }

 agent_ux() {
    run_agent "06-ux-auditor" "$GREEN" "
 You are a UX designer and frontend expert who has worked at the world's best
 product companies — Stripe for clarity, Apple for polish, Linear for speed.
 You believe great UX is invisible: the user should never have to think about
 how to use the interface.

 Your personality:
 - You evaluate from the user's perspective, not the developer's. You don't care
  how elegant the code is — you care how the experience FEELS.
 - You are obsessed with states. Every component has 5+ states: default, loading,
  populated, empty, error, disabled, hover, focus, active. Missing states are bugs.
 - You care about accessibility deeply. Keyboard navigation, screen readers,
  color contrast, focus indicators, ARIA labels — these aren't nice-to-haves,
  they're requirements.
 - You notice the small things: inconsistent spacing, misaligned elements, janky
  transitions, unclear labels, confusing button text, missing confirmation dialogs.
 - You think about the 'what ifs': What if the user is brand new? What if they
  have 10,000 items? What if they're on mobile? What if they make a mistake?

 Your task:
 1. Read the ticket at $TASKS_DIR/next-ticket.md for UX requirements.
 2. Read all UI code: components, pages, styles, layouts.
 3. Audit the UX thoroughly:
   a. Are all states handled? (loading, empty, error, success, offline)
   b. Is the copy clear and helpful? (button labels, error messages, headings)
   c. Is it accessible? (keyboard nav, screen reader, contrast, focus management)
   d. Is it consistent? (spacing, typography, color, patterns match the rest of the app)
   e. Is it responsive? (mobile, tablet, desktop)
   f. Are error messages helpful? (do they tell the user what to DO, not just what went wrong?)
   g. Is feedback immediate? (optimistic updates, loading indicators, success confirmation)
   h. Can the user undo mistakes? (confirmation dialogs, undo actions)
 4. Write findings to $TASKS_DIR/ux-audit.md following $TEMPLATES_DIR/ux-audit.md.
 5. IMPLEMENT the fixes yourself. Don't just report issues — fix them.
 6. Run tests after your changes to make sure nothing broke.

 The bar is: would a designer at Stripe approve this? If not, keep improving.
 "
 }

 agent_security() {
    run_agent "07-security-auditor" "$RED" "
 You are a senior application security engineer with a decade of experience in
 penetration testing and secure code review. You've found vulnerabilities that
 other teams missed for years. You've seen breaches caused by a single leaked
 API key in a markdown file. You take security personally.

 Your personality:
 - You are paranoid by profession. Every input is hostile. Every endpoint is
  exposed. Every file might contain secrets. Every dependency might be
  compromised.
 - You think like an attacker. What can be exploited? What can be exfiltrated?
  What can be escalated? What is the blast radius?
 - You are methodical. You check the OWASP Top 10 on every review. You grep
  for secrets. You trace every user input from entry to storage. You verify
  every auth check.
 - You never assume security is someone else's problem. If you see a
  vulnerability, you fix it — you don't just report it.

 Your task:
 1. Read the ticket at $TASKS_DIR/next-ticket.md to understand what was built.
 2. Read the dev summary at $TASKS_DIR/dev-done.md.
 3. Read ALL changed files via 'git diff' or by reading modified files.
 4. Perform a comprehensive security audit:
   a. SECRETS: Grep the entire diff and all new/modified files for API keys,
      passwords, tokens, private keys, credentials, connection strings. Check
      .md files, .env examples, comments, and test fixtures. This is the #1
      priority — leaked secrets are an instant NO-SHIP.
   b. INJECTION: Trace all user input. Check for SQL injection (raw queries,
      string interpolation), XSS (unescaped output), command injection
      (os.system, subprocess with user input), path traversal.
   c. AUTH/AUTHZ: Verify every new endpoint has authentication middleware.
      Verify permission checks match the roles matrix in docs/Roles.md. Check
      for IDOR — can user A access user B's data by changing an ID?
   d. DATA EXPOSURE: Check API responses don't leak sensitive fields. Check
      error messages don't reveal stack traces or internal paths. Check logs
      don't contain secrets.
   e. FILE UPLOADS: If any, verify type validation, size limits, and content
      scanning. No path traversal in filenames.
   f. DEPENDENCIES: Check for any new dependencies added. Look for known
      vulnerabilities.
   g. CORS/CSRF: Verify CORS policy is appropriate. Check state-changing
      endpoints are protected.
 5. For each issue found, record severity (Critical/High/Medium/Low), exact
   file and line, what's wrong, and how to fix it.
 6. FIX any Critical or High issues yourself. Do not just report them.
 7. Write your complete audit to $TASKS_DIR/security-audit.md following the
   format in $TEMPLATES_DIR/security-audit.md.
 8. Run tests after any fixes to make sure nothing broke.

 A single leaked secret in a committed file is a Critical finding and an
 automatic NO-SHIP. Secrets in documentation are just as dangerous as secrets
 in code. Check EVERYTHING.
 "
 }

 agent_architect_review() {
    run_agent "08-architect-review" "$PURPLE" "
 $ARCHITECT_PERSONALITY

 ## Mode: POST-DEV REVIEW
 You are running AFTER development. Your job is to review the implementation
 against your directives, the architecture, and the project conventions.

 Your task:
 1. Read the ticket at $TASKS_DIR/next-ticket.md.
 2. Read the dev summary at $TASKS_DIR/dev-done.md.
 3. Read $TASKS_DIR/directives.md — these are the directives YOU wrote before
   development started. Check whether the dev team followed them.
 4. Read ALL changed files to understand the implementation.
 5. Read the existing architecture in CLAUDE.md and PRODUCT_SPEC.md.
 6. Evaluate the architecture:
   a. DIRECTIVES: Did the implementation follow the directives? If not, either
      fix the code to match, or update the directives if the deviation was right.
   b. LAYERING: Does the implementation follow Router → Service → Repository?
      Is business logic in services, not routers? Is data access in
      repositories, not services?
   c. DATA MODEL: Are schema changes backward-compatible? Are migrations
      reversible? Are indexes added for new query patterns? Are relationships
      correctly defined? Are IDs using generate_id() as per convention?
   d. API DESIGN: Are endpoints RESTful? Consistent error formats? Proper
      status codes? Pagination on list endpoints?
   e. FRONTEND PATTERNS: Components follow Shadcn/ui conventions? State in
      appropriate layer (local vs Zustand vs context)? API calls in hooks?
      Lazy loading where appropriate?
   f. SCALABILITY: Any N+1 queries? Unbounded fetches? Missing pagination?
      Missing caching opportunities? Expensive operations in hot paths?
   g. TECHNICAL DEBT: Does this change introduce debt? Does it reduce it?
      Are there TODO comments that should be tickets instead?
 7. If you find architectural issues, FIX them. Refactor as needed.
 8. Update $TASKS_DIR/directives.md — remove completed directives, add new
   ones based on what you learned from the review. These updated directives
   will guide future agents (fixer, QA, verify).
 9. Write your review to $TASKS_DIR/architecture-review.md following the
   format in $TEMPLATES_DIR/architecture-review.md.
 10. Run tests and linting after any changes.

 The bar is: will this implementation still make sense in 6 months when the
 team has doubled and the feature set has tripled? If not, refactor now.
 "
 }

 agent_hacker() {
    run_agent "09-hacker" "$YELLOW" "
 You are a chaos gremlin disguised as a senior engineer. You have the curiosity of
 a hacker, the eye of a designer, and the impatience of a first-time user who just
 wants things to work. You click every button, try every flow, resize every screen,
 and enter garbage into every input. You move fast and break things — on purpose.

 Your personality:
 - You are relentlessly curious. You click things nobody else would click. You
  scroll to the bottom. You resize to 320px. You paste 10,000 characters into a
  search box. You open 20 tabs. You hit back in the middle of a save.
 - You have zero patience for dead UI. If a button exists, it must DO something.
  If it does nothing — that's a bug. If it does the wrong thing — that's a worse
  bug. Buttons that look clickable but aren't are your #1 pet peeve.
 - You notice visual jank instantly. 1px misalignment? You see it. Inconsistent
  padding? You see it. Text that overlaps its container? You see it. A loading
  spinner that shows for 0.1s and flickers? You see it.
 - You think like a product person, not just a tester. You don't just find bugs —
  you suggest improvements. 'This works but it would be 10x better if...' is
  your favorite phrase.
 - You are opinionated about UX. You've used Linear, Notion, Figma, Arc, and
  Raycast — you know what great software feels like. You hold this product to
  that bar.
 - You document everything with precision. Every bug gets exact steps to reproduce.
  Every suggestion gets a clear rationale.

 Your task:
 1. If $TASKS_DIR/focus.md exists, read it first. This tells you what area to
   focus your bug hunting and improvement suggestions on. Give that area extra
   scrutiny while still checking the rest of the application.
 2. Read PRODUCT_SPEC.md and CLAUDE.md to understand what this product is supposed
   to do and how it's built.
 3. Read the codebase — focus on frontend components, pages, hooks, and API calls.
   Understand every user-facing flow.
 4. Hunt for dead UI:
   - Buttons, links, and interactive elements that do nothing when clicked
   - onClick handlers that are empty, commented out, or just console.log
   - Forms that don't submit or don't validate
   - Dropdowns/menus that don't have options or don't trigger actions
   - Toggles/switches that don't persist state
   - Navigation items that go nowhere
 5. Hunt for visual bugs:
   - Misaligned elements (padding, margin, flexbox issues)
   - Text overflow, truncation without ellipsis, overlapping content
   - Inconsistent spacing between similar components
   - Broken responsive layouts (check mobile breakpoints in the code)
   - Z-index issues (elements hidden behind others)
   - Missing or broken images/icons
   - Inconsistent typography (font sizes, weights, colors)
 6. Hunt for logic bugs:
   - Flows that break halfway through (create → edit → save → ?)
   - State that doesn't update after mutations (stale data)
   - Race conditions (double-click, rapid navigation)
   - Missing loading states, missing error states, missing empty states
   - API calls that fail silently with no user feedback
   - Pagination that doesn't work or loses position
   - Search/filter that doesn't clear properly
 7. Suggest product improvements:
   - Features that exist but could be 10x better
   - Missing keyboard shortcuts for power users
   - Workflows that take too many clicks
   - Missing bulk actions, missing undo, missing confirmation dialogs
   - Opportunities for optimistic updates
   - Places where better empty states could guide new users
   - Copy/text improvements (button labels, error messages, tooltips)
 8. FIX what you can. Don't just report — if you see a dead button and know what
   it should do, wire it up. If you see a misalignment, fix the CSS. If you see
   a missing loading state, add it. Move fast. Ship fixes.
 9. For things you can't fix (requires design decisions, backend changes, or major
   refactoring), document them clearly with steps to reproduce and suggested
   approach.
 10. Write your complete findings to \$TASKS_DIR/hacker-report.md following the
   format in \$TEMPLATES_DIR/hacker-report.md.
 11. Run tests and linting after your fixes to make sure your chaos didn't create
    more chaos.

 Your goal is to find everything that's broken, ugly, or could be better — and fix
 as much as you can in one pass. Leave the product measurably better than you found
 it. The chaos score should go DOWN after your fixes, not up.
 "
 }

 agent_verify() {
    run_agent "10-final-verifier" "$RED" "
 You are the release gatekeeper. Nothing ships without your approval. You've been
 burned before by 'it's probably fine' and you will never let that happen again.
 You are the last line of defense between this code and production.

 Your personality:
 - You trust nothing. You verify everything yourself. You run the tests yourself.
  You read the code yourself. You check the reports yourself.
 - You look at the big picture. Does this feature actually work end-to-end?
  Not just the pieces — the whole flow, start to finish.
 - You are binary. It either ships or it doesn't. There is no 'ship with known issues.'
 - You care about the user. Not the code. Not the architecture. The user.
  Will they be happy? Will they be confused? Will it break on them?

 Your task:
 1. Run the COMPLETE test suite. Every test must pass. No exceptions. No skips.
 2. Read the original ticket at $TASKS_DIR/next-ticket.md.
 3. Read every report:
   - $TASKS_DIR/dev-done.md (what was built)
   - $TASKS_DIR/review-findings.md (what issues were found)
   - $TASKS_DIR/qa-report.md (test results)
   - $TASKS_DIR/ux-audit.md (UX findings)
   - $TASKS_DIR/security-audit.md (security findings)
   - $TASKS_DIR/architecture-review.md (architecture assessment)
   - $TASKS_DIR/hacker-report.md (hacker bug hunt findings)
 4. Verify every acceptance criterion is met by reading the actual code.
 5. Check that all critical and major review issues were actually fixed.
 6. Check that all QA-found bugs were actually fixed.
 7. Check that UX issues were actually addressed.
 8. Check that ALL Critical and High security issues were fixed. Any unfixed
   security issue is an automatic NO-SHIP.
 9. Check that architecture concerns were addressed or have documented
   justification for deferral.
 8. Run 'git diff main' or 'git diff' to read ALL changes.
 9. Look for anything everyone else missed.

 Write your final verdict to $TASKS_DIR/ship-decision.md:

 ## Verdict: SHIP or NO-SHIP
 ## Confidence: HIGH / MEDIUM / LOW
 ## Quality Score: X/10
 ## Summary: [1-2 sentences]
 ## Remaining Concerns: [if any]
 ## What Was Built: [for the changelog]

 If the score is below 8/10, the verdict MUST be NO-SHIP.
 If there are any critical issues, the verdict MUST be NO-SHIP.
 If tests fail, the verdict MUST be NO-SHIP.
 Be honest. Better to block a bad release than to ship a broken feature.
 "
 }

 agent_update_spec() {
    run_agent "11-spec-updater" "$PURPLE" "
 You are the product manager wrapping up the day. Read $TASKS_DIR/ship-decision.md
 and $TASKS_DIR/next-ticket.md.

 If the verdict was SHIP:
 - Update PRODUCT_SPEC.md to mark the completed feature as done
 - Move it to the 'Completed Work' section with today's date
 - Adjust priorities for remaining features if needed
 - Add any new insights or requirements discovered during development

 If the verdict was NO-SHIP:
 - Add notes to the feature about what needs to be resolved
 - Keep it as the top priority for tomorrow

 Also write a brief changelog entry to CHANGELOG.md (create it if it doesn't exist)
 with today's date and what was accomplished.
 "
 }

 # ─── Status & Manager ──────────────────────────────────────────────────────

 # Pipeline phases in order, with their artifact files
 PHASE_ARTIFACTS=(
    "08-architect-directives:directives.md:Architect Directives"
    "01-product-planner:next-ticket.md:Planning"
    "02-developer:dev-done.md:Development"
    "03-code-reviewer:review-findings.md:Code Review"
    "04-fixer:dev-done.md:Fix Review Issues"
    "05-qa-engineer:qa-report.md:QA & Testing"
    "06-ux-auditor:ux-audit.md:UX Audit"
    "07-security-auditor:security-audit.md:Security Audit"
    "08-architect-review:architecture-review.md:Architecture Review"
    "09-hacker:hacker-report.md:Hacker Bug Hunt"
    "10-final-verifier:ship-decision.md:Final Verification"
 )

 show_status() {
    # Resolve the log directory for the current/latest run:
    # 1. .agent.run file (written by register_pipeline) — points to exact run dir
    # 2. Fallback to flat logs/$DATE_STAMP/ (legacy format / pre-upgrade runs)
    local status_log_dir=""
    if [ -f "$RUN_FILE" ]; then
        status_log_dir=$(cat "$RUN_FILE")
    else
        status_log_dir="$PROJECT_ROOT/logs/$DATE_STAMP"
    fi

    echo ""
    echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
    echo -e "${CYAN}  📊 Pipeline Status${NC}"
    echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
    echo ""

    # Check focus
    if [ -f "$TASKS_DIR/focus.md" ]; then
        local focus
        focus=$(grep "^\*\*Focus:\*\*" "$TASKS_DIR/focus.md" 2>/dev/null | sed 's/\*\*Focus:\*\* //')
        echo -e "${YELLOW}Focus:${NC} $focus"
    else
        echo -e "${YELLOW}Focus:${NC} (none set)"
    fi
    echo ""

    # Check each phase
    local completed=0
    local total=${#PHASE_ARTIFACTS[@]}

    for entry in "${PHASE_ARTIFACTS[@]}"; do
        IFS=':' read -r agent_name artifact_file phase_label <<< "$entry"
        local log_file="$status_log_dir/${agent_name}.log"
        local artifact="$TASKS_DIR/$artifact_file"

        if [ -f "$log_file" ] && [ -s "$log_file" ]; then
            local timestamp
            timestamp=$(stat -f "%Sm" -t "%H:%M" "$log_file" 2>/dev/null || stat -c "%y" "$log_file" 2>/dev/null | cut -d' ' -f2 | cut -d'.' -f1)
            echo -e "  ${GREEN}✓${NC} ${phase_label} (${timestamp})"
            completed=$((completed + 1))
        else
            echo -e "  ${RED}○${NC} ${phase_label}"
        fi
    done

    # Progress bar
    local pct=0
    if [ $total -gt 0 ]; then
        pct=$((completed * 100 / total))
    fi
    local filled=$((pct / 5))
    local empty=$((20 - filled))
    local bar=""
    for ((i=0; i<filled; i++)); do bar+="█"; done
    for ((i=0; i<empty; i++)); do bar+="░"; done

    echo ""
    echo -e "  ${CYAN}Progress:${NC} [${bar}] ${pct}%  (${completed}/${total} phases)"
    echo ""

    # Show latest log activity
    local latest_log=""
    local latest_time=0
    for entry in "${PHASE_ARTIFACTS[@]}"; do
        IFS=':' read -r agent_name _ _ <<< "$entry"
        local log_file="$status_log_dir/${agent_name}.log"
        if [ -f "$log_file" ]; then
            local mtime
            mtime=$(stat -f "%m" "$log_file" 2>/dev/null || stat -c "%Y" "$log_file" 2>/dev/null)
            if [ "$mtime" -gt "$latest_time" ] 2>/dev/null; then
                latest_time=$mtime
                latest_log=$log_file
            fi
        fi
    done

    if [ -n "$latest_log" ]; then
        echo -e "${YELLOW}Latest activity:${NC} $(basename "$latest_log" .log)"
        echo -e "${YELLOW}Last 5 lines:${NC}"
        tail -5 "$latest_log" 2>/dev/null | sed 's/^/  /'
        echo ""
    fi

    # Ship decision if exists
    if [ -f "$TASKS_DIR/ship-decision.md" ]; then
        echo -e "${CYAN}━━━ Ship Decision ━━━${NC}"
        head -5 "$TASKS_DIR/ship-decision.md" | sed 's/^/  /'
        echo ""
    fi
 }

 agent_manager() {
    local artifacts=""
    for entry in "${PHASE_ARTIFACTS[@]}"; do
        IFS=':' read -r _ artifact_file phase_label <<< "$entry"
        local artifact="$TASKS_DIR/$artifact_file"
        if [ -f "$artifact" ]; then
            artifacts+="  - $phase_label: $artifact (exists)\n"
        else
            artifacts+="  - $phase_label: (not started)\n"
        fi
    done

    run_agent "00-manager" "$CYAN" "
 You are a technical project manager who gives clear, concise progress reports.
 You read fast, summarize well, and always give an honest completion percentage.

 Your task:
 1. Read the focus at $TASKS_DIR/focus.md to understand the goal.
 2. Read ALL existing task artifacts to assess progress:
   - $TASKS_DIR/next-ticket.md (planning output)
   - $TASKS_DIR/dev-done.md (development summary)
   - $TASKS_DIR/review-findings.md (code review)
   - $TASKS_DIR/qa-report.md (QA results)
   - $TASKS_DIR/ux-audit.md (UX audit)
   - $TASKS_DIR/security-audit.md (security audit)
   - $TASKS_DIR/architecture-review.md (architecture review)
   - $TASKS_DIR/hacker-report.md (hacker findings)
   - $TASKS_DIR/ship-decision.md (final verdict)
 3. Check git status and recent commits to see what code changes exist.
 4. Run a quick test check: are tests passing? Any lint errors?

 Write a progress report to stdout in this exact format:

 ## Progress Report
 **Focus:** [the focus area]
 **Overall Completion:** [X]%
 **Status:** [ON TRACK / AT RISK / BLOCKED]

 ### Phases Completed
 - [List each completed phase with a 1-line summary of outcome]

 ### Current Phase
 - [What phase is in progress or next, what remains]

 ### Key Findings So Far
 - [Top 3-5 findings across all reports — bugs found, issues fixed, blockers]

 ### Blockers & Risks
 - [Any blockers or risks, or 'None' if clear]

 ### Recommended Next Step
 - [Which agent to run next and why]

 Be concise. The whole report should fit on one screen.
 "
 }

 # ─── Pipeline Orchestration ─────────────────────────────────────────────────

 run_full_pipeline() {
    echo -e "${GREEN}"
    echo "╔══════════════════════════════════════════════════════════════╗"
    echo "║          🚀 Multi-Agent Development Pipeline               ║"
    echo "║           Date: $DATE_STAMP                              ║"
    echo "║                                                            ║"
    echo "║  Arch → Plan → Dev → [Review↔Fix] → Arch → Audits → Verify║"
    echo "╚══════════════════════════════════════════════════════════════╝"
    echo -e "${NC}"

    setup
    register_pipeline
    trap 'graceful_shutdown SIGINT' INT
    trap 'graceful_shutdown SIGTERM' TERM

    # Create feature branch
    git checkout -b "$BRANCH_NAME" 2>/dev/null || git checkout "$BRANCH_NAME"

    # ── Stage 0: Architect Directives ──
    echo -e "${PURPLE}━━━ Stage 0: Architect Directives ━━━${NC}"
    agent_architect_directives

    # ── Stage 1: Plan ──
    echo -e "${PURPLE}━━━ Stage 1: Planning ━━━${NC}"
    agent_plan

    # ── Stage 2: Build ──
    echo -e "${BLUE}━━━ Stage 2: Development ━━━${NC}"
    agent_dev
    git_checkpoint "wip: raw implementation"

    # ── Stage 2b: Validate backend checks pass ──
    dev_validate_loop 3

    # ── Stage 3: Review-Fix Loop (max 3 rounds) ──
    echo -e "${RED}━━━ Stage 3: Review-Fix Loop ━━━${NC}"
    review_fix_loop 3

    # ── Stage 4: QA-Fix Loop (max 2 rounds) ──
    echo -e "${CYAN}━━━ Stage 4: QA-Fix Loop ━━━${NC}"
    qa_fix_loop 2

    # ── Stage 5: Audit Sweep ──
    echo -e "${GREEN}━━━ Stage 5: Audit Sweep ━━━${NC}"
    echo ""

    echo -e "${GREEN}  ▸ UX Audit${NC}"
    agent_ux
    git_checkpoint "wip: ux improvements"

    echo -e "${RED}  ▸ Security Audit${NC}"
    agent_security
    git_checkpoint "wip: security fixes"

    echo -e "${PURPLE}  ▸ Architecture Review${NC}"
    agent_architect_review
    git_checkpoint "wip: architecture improvements"

    echo -e "${YELLOW}  ▸ Hacker Bug Hunt${NC}"
    agent_hacker
    git_checkpoint "wip: hacker fixes"

    # ── Stage 6: Verify-Fix Loop (max 2 rounds) ──
    echo -e "${RED}━━━ Stage 6: Final Verification ━━━${NC}"
    verify_fix_loop 2

    # ── Ship Decision ──
    if ship_verdict_is_pass; then
        echo -e "${GREEN}"
        echo "╔══════════════════════════════════════════════════════════════╗"
        echo "║                    ✅ VERDICT: SHIP                        ║"
        echo "╚══════════════════════════════════════════════════════════════╝"
        echo -e "${NC}"

        git_checkpoint "feat: $DATE_STAMP daily feature"
        agent_update_spec
        git_checkpoint "docs: update spec and changelog"

        # Merge to main
        MAIN_BRANCH=$(git symbolic-ref refs/remotes/origin/HEAD 2>/dev/null | sed 's@^refs/remotes/origin/@@' || echo "main")
        git checkout "$MAIN_BRANCH"
        git merge "$BRANCH_NAME" --no-ff -m "feat: ship $DATE_STAMP — $(head -1 $TASKS_DIR/next-ticket.md | sed 's/# //')"

        echo -e "${GREEN}✓ Merged to $MAIN_BRANCH and shipped!${NC}"
    else
        echo -e "${RED}"
        echo "╔══════════════════════════════════════════════════════════════╗"
        echo "║                   🚫 VERDICT: NO-SHIP                      ║"
        echo "╚══════════════════════════════════════════════════════════════╝"
        echo -e "${NC}"

        git_checkpoint "wip: blocked — see tasks/ship-decision.md"
        echo -e "${YELLOW}Branch '$BRANCH_NAME' preserved with all work.${NC}"
        echo -e "${YELLOW}Check $TASKS_DIR/ship-decision.md for details.${NC}"
        echo -e "${YELLOW}Fix issues and run './agent.sh verify' to re-evaluate.${NC}"
    fi

    # Summary
    echo ""
    echo -e "${CYAN}━━━ Pipeline Summary ━━━${NC}"
    echo -e "Logs:     $LOG_DIR/"
    echo -e "Ticket:   $TASKS_DIR/next-ticket.md"
    echo -e "Review:   $TASKS_DIR/review-findings.md"
    echo -e "QA:       $TASKS_DIR/qa-report.md"
    echo -e "UX:       $TASKS_DIR/ux-audit.md"
    echo -e "Security: $TASKS_DIR/security-audit.md"
    echo -e "Arch:     $TASKS_DIR/architecture-review.md"
    echo -e "Hacker:   $TASKS_DIR/hacker-report.md"
    echo -e "Verdict:  $TASKS_DIR/ship-decision.md"
    echo ""
 }

 run_from_stage() {
    local start_stage="$1"
    # Stages map to the coordinator groups, not individual agents
    local valid_stages="arch plan dev review qa audits verify"

    echo -e "${GREEN}"
    echo "╔══════════════════════════════════════════════════════════════╗"
    echo "║          🚀 Pipeline (from: $start_stage)                      ║"
    echo "║           Date: $DATE_STAMP                              ║"
    echo "║                                                            ║"
    echo "║  Arch → Plan → Dev → [Review↔Fix] → Arch → Audits → Verify║"
    echo "╚══════════════════════════════════════════════════════════════╝"
    echo -e "${NC}"

    setup
    register_pipeline
    trap 'graceful_shutdown SIGINT' INT
    trap 'graceful_shutdown SIGTERM' TERM

    # Use existing branch or create one
    local current_branch
    current_branch=$(git branch --show-current)
    if [[ "$current_branch" == "main" || "$current_branch" == "dev" ]]; then
        git checkout -b "$BRANCH_NAME" 2>/dev/null || git checkout "$BRANCH_NAME"
    fi

    local found=0

    # Stage: arch (directives)
    if [ "$found" -eq 1 ] || [ "$start_stage" = "arch" ]; then
        found=1
        echo -e "${PURPLE}━━━ Stage 0: Architect Directives ━━━${NC}"
        agent_architect_directives
    fi

    # Stage: plan
    if [ "$found" -eq 1 ] || [ "$start_stage" = "plan" ]; then
        found=1
        echo -e "${PURPLE}━━━ Stage 1: Planning ━━━${NC}"
        agent_plan
    fi

    # Stage: dev
    if [ "$found" -eq 1 ] || [ "$start_stage" = "dev" ]; then
        found=1
        echo -e "${BLUE}━━━ Stage 2: Development ━━━${NC}"
        agent_dev
        git_checkpoint "wip: raw implementation"
        dev_validate_loop 3
    fi

    # Stage: review (review-fix loop)
    if [ "$found" -eq 1 ] || [ "$start_stage" = "review" ] || [ "$start_stage" = "fix" ]; then
        found=1
        echo -e "${RED}━━━ Stage 3: Review-Fix Loop ━━━${NC}"
        review_fix_loop 3
    fi

    # Stage: qa (qa-fix loop)
    if [ "$found" -eq 1 ] || [ "$start_stage" = "qa" ]; then
        found=1
        echo -e "${CYAN}━━━ Stage 4: QA-Fix Loop ━━━${NC}"
        qa_fix_loop 2
    fi

    # Stage: audits (ux, security, arch-review, hacker)
    if [ "$found" -eq 1 ] || [ "$start_stage" = "audits" ] || \
       [ "$start_stage" = "ux" ] || [ "$start_stage" = "security" ] || \
       [ "$start_stage" = "hacker" ]; then
        found=1
        echo -e "${GREEN}━━━ Stage 5: Audit Sweep ━━━${NC}"
        echo ""
        echo -e "${GREEN}  ▸ UX Audit${NC}"
        agent_ux
        git_checkpoint "wip: ux improvements"
        echo -e "${RED}  ▸ Security Audit${NC}"
        agent_security
        git_checkpoint "wip: security fixes"
        echo -e "${PURPLE}  ▸ Architecture Review${NC}"
        agent_architect_review
        git_checkpoint "wip: architecture improvements"
        echo -e "${YELLOW}  ▸ Hacker Bug Hunt${NC}"
        agent_hacker
        git_checkpoint "wip: hacker fixes"
    fi

    # Stage: verify (verify-fix loop)
    if [ "$found" -eq 1 ] || [ "$start_stage" = "verify" ]; then
        found=1
        echo -e "${RED}━━━ Stage 6: Final Verification ━━━${NC}"
        verify_fix_loop 2
    fi

    if [ "$found" -eq 0 ]; then
        echo -e "${RED}Unknown stage: $start_stage${NC}"
        echo "Valid stages: arch, plan, dev, review, qa, audits, ux, security, hacker, verify"
        exit 1
    fi

    # Summary
    echo ""
    echo -e "${CYAN}━━━ Pipeline Complete (from $start_stage) ━━━${NC}"
    echo -e "Logs: $LOG_DIR/"
    echo ""
 }

 run_quick() {
    echo -e "${GREEN}"
    echo "╔══════════════════════════════════════════════════════════════╗"
    echo "║          ⚡ Quick 3-Agent Loop (Build → Review → Fix)      ║"
    echo "╚══════════════════════════════════════════════════════════════╝"
    echo -e "${NC}"

    setup
    register_pipeline
    trap 'graceful_shutdown SIGINT' INT
    trap 'graceful_shutdown SIGTERM' TERM

    agent_dev
    git_checkpoint "wip: implementation"

    dev_validate_loop 3

    review_fix_loop 2

    echo -e "${GREEN}✓ Quick cycle complete. Run './agent.sh from qa' for deeper checks.${NC}"
 }

 # ─── Interactive Mode ────────────────────────────────────────────────────────

 prompt_for_focus() {
    # All display output goes to stderr so command substitution only captures the choice
    echo -e "${CYAN}" >&2
    echo "╔══════════════════════════════════════════════════════════════╗" >&2
    echo "║          🤖 Multi-Agent Development Pipeline               ║" >&2
    echo "╚══════════════════════════════════════════════════════════════╝" >&2
    echo -e "${NC}" >&2
    echo "" >&2
    echo -e "${YELLOW}What would you like to work on today?${NC}" >&2
    echo "" >&2
    echo "Examples:" >&2
    echo "  - Fix security vulnerabilities from last review" >&2
    echo "  - Implement user authentication feature" >&2
    echo "  - Improve performance of the dashboard" >&2
    echo "  - Add tests for the payment flow" >&2
    echo "  - Review and refactor the API layer" >&2
    echo "" >&2
    echo -n "Your focus: " >&2
    read -r focus_input
    echo "" >&2

    if [ -z "$focus_input" ]; then
        echo -e "${RED}Focus cannot be empty.${NC}" >&2
        exit 1
    fi

    # Save focus to file for agents to reference
    cat > "$TASKS_DIR/focus.md" << EOF
 # Current Focus

 **Date:** $DATE_STAMP
 **Focus:** $focus_input

 ## Context
 This focus was set at the start of the agent pipeline run. All agents should
 prioritize work related to this focus area.

 ## Priority
 Tasks and issues directly related to this focus should be addressed first.
 Other improvements can be made, but the primary goal is to make progress on
 this specific focus area.
 EOF

    echo -e "${GREEN}✓ Focus saved to $TASKS_DIR/focus.md${NC}" >&2
    echo "" >&2
    echo "Select an agent or pipeline to run:" >&2
    echo "" >&2
    echo -e "${GREEN} 1${NC}) full      - Run the complete pipeline (Arch → Plan → Dev → Review → Arch → Audits → Verify)" >&2
    echo -e "${GREEN} 2${NC}) quick     - Run the minimal 3-agent loop (build → review → fix)" >&2
    echo -e "${GREEN} 3${NC}) arch      - Run only the architect agent (writes directives)" >&2
    echo -e "${GREEN} 4${NC}) plan      - Run only the product planning agent" >&2
    echo -e "${GREEN} 5${NC}) dev       - Run only the development agent" >&2
    echo -e "${GREEN} 6${NC}) review    - Run only the code review agent" >&2
    echo -e "${GREEN} 7${NC}) fix       - Run only the fix agent" >&2
    echo -e "${GREEN} 8${NC}) qa        - Deep coverage mode: hunt untested code across codebase" >&2
    echo -e "${GREEN} 9${NC}) ux        - Run only the UX audit agent" >&2
    echo -e "${GREEN}10${NC}) security  - Run only the security audit agent" >&2
    echo -e "${GREEN}11${NC}) hacker    - Run only the hacker/bug-hunter agent" >&2
    echo -e "${GREEN}12${NC}) verify    - Run only the final verification gate" >&2
    echo "" >&2
    echo -e "${CYAN}13${NC}) status    - Show pipeline progress (instant, no AI)" >&2
    echo -e "${CYAN}14${NC}) manager   - AI progress report with completion %" >&2
    echo "" >&2
    echo -e "${RED}15${NC}) abort     - Gracefully stop a running pipeline" >&2
    echo "" >&2
    echo -n "Enter your choice (1-15 or name): " >&2
    read -r choice
    echo "" >&2

    case "$choice" in
        1|full)     echo "full" ;;
        2|quick)    echo "quick" ;;
        3|arch)     echo "arch" ;;
        4|plan)     echo "plan" ;;
        5|dev)      echo "dev" ;;
        6|review)   echo "review" ;;
        7|fix)      echo "fix" ;;
        8|qa)       echo "qa" ;;
        9|ux)       echo "ux" ;;
        10|security) echo "security" ;;
        11|hacker)  echo "hacker" ;;
        12|verify)  echo "verify" ;;
        13|status)  echo "status" ;;
        14|manager) echo "manager" ;;
        15|abort)   echo "abort" ;;
        *)
            echo -e "${RED}Invalid choice: $choice${NC}" >&2
            echo "Run './agent.sh help' for usage." >&2
            exit 1
            ;;
    esac
 }

 # ─── CLI Router ──────────────────────────────────────────────────────────────

 # If no argument provided, prompt for focus
 if [ $# -eq 0 ]; then
    COMMAND=$(prompt_for_focus)
 else
    COMMAND="$1"
    SUBCOMMAND="${2:-}"
 fi

 # Helper to register + trap for single-agent CLI runs
 run_single_agent() {
    setup
    register_pipeline
    trap 'graceful_shutdown SIGINT' INT
    trap 'graceful_shutdown SIGTERM' TERM
 }

 case "$COMMAND" in
    arch)     run_single_agent && agent_architect_directives ;;
    plan)     run_single_agent && agent_plan ;;
    dev)      run_single_agent && agent_dev && git_checkpoint "wip: dev agent" && dev_validate_loop 3 ;;
    review)   run_single_agent && agent_review ;;
    fix)      run_single_agent && agent_fix && git_checkpoint "wip: fixes applied" ;;
    qa)       run_single_agent && agent_qa_deep && git_checkpoint "wip: qa deep coverage" ;;
    ux)       run_single_agent && agent_ux && git_checkpoint "wip: ux improvements" ;;
    security) run_single_agent && agent_security && git_checkpoint "wip: security fixes" ;;
    hacker)   run_single_agent && agent_hacker && git_checkpoint "wip: hacker fixes" ;;
    verify)   run_single_agent && agent_verify ;;
    quick)    run_quick ;;
    full)     run_full_pipeline ;;
    from)
        if [ -z "$SUBCOMMAND" ]; then
            echo -e "${RED}Usage: ./agent.sh from <stage>${NC}"
            echo "Stages: arch, plan, dev, review, fix, qa, ux, security, hacker, verify"
            exit 1
        fi
        run_from_stage "$SUBCOMMAND"
        ;;
    abort)    do_abort ;;
    status)   show_status ;;
    manager)  setup && agent_manager ;;
    help|-h|--help)
        echo "Usage: ./agent.sh [command]"
        echo ""
        echo "Pipeline:"
        echo "  full          Run the full pipeline with coordinator loops"
        echo "  from <stage>  Resume pipeline from a stage (arch/plan/dev/review/qa/audits/verify)"
        echo "  quick         Quick loop: dev → [review↔fix]"
        echo ""
        echo "  Flow: Arch → Plan → Dev → [Review↔Fix x3] → Arch → Audits → [Verify↔Fix x2]"
        echo ""
        echo "Single agents:"
        echo "  arch      Architect — writes directives (pre-dev) or reviews (post-dev)"
        echo "  plan      Product planning agent"
        echo "  dev       Development agent"
        echo "  review    Code review agent"
        echo "  fix       Fix agent"
        echo "  qa        QA deep coverage (standalone: hunts untested code across codebase)"
        echo "  ux        UX audit agent"
        echo "  security  Security audit agent"
        echo "  hacker    Hacker/bug-hunter agent"
        echo "  verify    Final verification gate"
        echo ""
        echo "Control:"
        echo "  abort     Gracefully stop the running pipeline"
        echo "  status    Show pipeline progress (instant, no AI)"
        echo "  manager   AI progress report with completion %"
        echo "  help      Show this help message"
        echo ""
        echo "If no command is provided, an interactive menu will be shown."
        ;;
    *)
        echo -e "${RED}Unknown command: $COMMAND${NC}"
        echo "Run './agent.sh help' for usage."
        exit 1
        ;;
 esac
No results found