Als-Pal · March 10, 2026 18:15 · jcurren00 · Feb 28, 2026
diff --git a/quicken_amazon_matcher.py b/quicken_amazon_matcher.py
 #!/usr/bin/env python3
 """
 Quicken ↔ Amazon Order History Matcher
 ========================================
 Enriches Quicken transaction memos with Amazon product names by matching
 transactions to Amazon order history exports using date + amount heuristics.

 Works with Quicken's Core Data SQLite backend (macOS .quicken package).

 Usage:
    1. Export your Amazon order history from amazon.com/your-orders
       (or use a browser extension / Amazon data request to get Order History CSV)
    2. Configure the paths and payee names in the CONFIG section below
    3. Run:  python3 quicken_amazon_matcher.py --scan        # preview matches
             python3 quicken_amazon_matcher.py --apply HIGH  # apply HIGH certainty
             python3 quicken_amazon_matcher.py --apply MED   # apply HIGH + MED
             python3 quicken_amazon_matcher.py --apply ALL   # apply everything matched
    4. Always creates a backup before writing. Restart Quicken to see changes.

 Requirements:
    - Python 3.7+
    - No external packages needed (stdlib only: sqlite3, csv, datetime, argparse)

 How Matching Works:
    - For purchases (debits): finds Amazon orders within ±14 days at the same amount
    - For returns (credits): searches backwards up to 120 days for the original purchase
    - Multi-item orders: sums line items per Order ID to match bundled transactions
    - Confidence tiers:
        HIGH = single match within 7 days (safe to auto-apply)
        MED  = single match within 8-30 days, or close multi-match
        LOW  = multiple candidates at same amount (needs manual review)

 Core Data Notes:
    - Quicken uses Apple Core Data (SQLite) with Z_PK, Z_ENT, Z_OPT columns
    - Z_OPT is an optimistic locking counter: MUST increment on every write
    - Date epoch is 2001-01-01 (add 978307200 for Unix timestamp)
    - Close Quicken BEFORE running writes; reopen after to pick up changes

 Author: Written with Claude (Anthropic) in an interactive Quicken enrichment project
 License: MIT
 """

 import sqlite3
 import csv
 import os
 import sys
 import shutil
 import argparse
 from datetime import datetime, timedelta
 from collections import defaultdict

 # ============================================================================
 # CONFIG — Edit these for your setup
 # ============================================================================

 # Path to the Quicken SQLite database inside your .quicken package.
 # macOS default: ~/Library/Application Support/Quicken/Documents/<name>.quicken/data
 # Quicken Beta uses "Quicken Beta" instead of "Quicken" in the path.
 QUICKEN_DB_PATH = os.path.expanduser(
    "~/Library/Application Support/Quicken/Documents/My Finances.quicken/data"
 )

 # Path to the Amazon Order History CSV.
 # You can request this from Amazon (Your Account → Download order reports)
 # or use a browser extension that exports order history.
 # Expected columns: Order Date, Order ID, Product Name, Total Amount
 AMAZON_CSV_PATH = os.path.expanduser(
    "~/Downloads/Amazon Order History.csv"
 )

 # Payee names in Quicken that correspond to Amazon transactions.
 # Run the discovery query below if you're not sure which names to use:
 #   SELECT DISTINCT up.ZNAME FROM ZTRANSACTION t
 #   JOIN ZUSERPAYEE up ON t.ZUSERPAYEE = up.Z_PK
 #   WHERE up.ZNAME LIKE '%amazon%' OR up.ZNAME LIKE '%amzn%'
 AMAZON_PAYEE_NAMES = [
    "Amazon.com",
    "Amazon",
    "AMZN Mktp US",
    "Amazon Marketplace",
    "Amazon.com*",
    "AMAZON.COM*AMZN.COM/BILL",
 ]

 # Transaction PKs to always skip (e.g., multi-item orders you'll split manually,
 # Amazon Prime memberships you want to label yourself, etc.)
 SKIP_PKS = set()

 # Matching parameters (tune if needed)
 DEBIT_WINDOW_NARROW = 14    # days: first-pass window for purchases
 DEBIT_WINDOW_WIDE = 30      # days: expanded window for purchases (MED/LOW)
 CREDIT_WINDOW = 120          # days: how far back to look for original purchase
 HIGH_THRESHOLD = 7           # days: max distance for HIGH certainty
 MED_THRESHOLD = 30           # days: max distance for MED certainty
 MEMO_MAX_LENGTH = 500        # truncate memos longer than this

 # Core Data epoch offset (2001-01-01 in Unix time)
 CORE_DATA_EPOCH = 978307200

 # ============================================================================
 # END CONFIG
 # ============================================================================


 def parse_amazon_date(date_str):
    """Parse Amazon CSV date, handling multiple formats."""
    date_str = date_str.strip()
    # ISO format: 2024-01-15T19:18:06Z
    if "T" in date_str:
        date_str = date_str[:10]
    # Standard: 2024-01-15 or 01/15/2024
    for fmt in ["%Y-%m-%d", "%m/%d/%Y", "%m/%d/%y", "%d/%m/%Y"]:
        try:
            return datetime.strptime(date_str, fmt)
        except ValueError:
            continue
    return None


 def parse_amazon_amount(amount_str):
    """Parse Amazon CSV amount, handling currency symbols and commas."""
    cleaned = amount_str.strip().replace("$", "").replace(",", "").replace("£", "").replace("€", "")
    try:
        return round(float(cleaned), 2)
    except ValueError:
        return None


 def load_amazon_orders(csv_path):
    """Load and parse the Amazon order history CSV."""
    orders = []
    with open(csv_path, "r", encoding="utf-8-sig") as f:
        reader = csv.DictReader(f)
        for row in reader:
            # Skip cancelled orders if status column exists
            status = row.get("Order Status", "").strip()
            if status.lower() == "cancelled":
                continue

            date = parse_amazon_date(row.get("Order Date", ""))
            amount = parse_amazon_amount(row.get("Total Amount", "0"))
            name = row.get("Product Name", "").strip()
            order_id = row.get("Order ID", "").strip()

            if date and amount and amount > 0 and name:
                orders.append({
                    "date": date,
                    "amount": amount,
                    "name": name,
                    "order_id": order_id,
                })
    return orders


 def build_order_totals(orders):
    """Group orders by Order ID and compute totals for multi-item matching."""
    groups = defaultdict(list)
    for o in orders:
        groups[o["order_id"]].append(o)

    totals = {}
    for oid, items in groups.items():
        total = round(sum(i["amount"] for i in items), 2)
        names = [i["name"] for i in items]
        totals[oid] = {
            "total": total,
            "names": names,
            "date": items[0]["date"],
            "count": len(items),
        }
    return totals


 def resolve_payee_pks(cursor, payee_names):
    """Look up Z_PK values for the given payee names."""
    pks = set()
    for name in payee_names:
        cursor.execute(
            "SELECT Z_PK FROM ZUSERPAYEE WHERE ZNAME = ? OR ZNAME LIKE ?",
            (name, name),
        )
        for row in cursor.fetchall():
            pks.add(row[0])

    if not pks:
        # Fallback: search broadly
        print("WARNING: No exact payee matches found. Trying broad search...")
        cursor.execute(
            "SELECT Z_PK, ZNAME FROM ZUSERPAYEE WHERE ZNAME LIKE '%amazon%' OR ZNAME LIKE '%amzn%'"
        )
        for row in cursor.fetchall():
            print(f"  Found payee: PK={row[0]} Name='{row[1]}'")
            pks.add(row[0])

    return pks


 def get_empty_memo_transactions(cursor, payee_pks, include_split_memo=False):
    """
    Fetch Amazon transactions that have no memo.

    If include_split_memo=False (default), also excludes transactions that have
    memos at the split/category level (ZCASHFLOWTRANSACTIONENTRY.ZNOTE), since
    those are already documented within their splits.
    """
    pk_list = ",".join(str(p) for p in payee_pks)

    if include_split_memo:
        # Simple: just get everything with empty transaction-level memo
        cursor.execute(f"""
            SELECT Z_PK, ZENTEREDDATE, ZAMOUNT
            FROM ZTRANSACTION
            WHERE ZUSERPAYEE IN ({pk_list})
              AND (ZNOTE IS NULL OR ZNOTE = '')
            ORDER BY ZENTEREDDATE
        """)
    else:
        # Exclude transactions that have split-level memos
        cursor.execute(f"""
            SELECT t.Z_PK, t.ZENTEREDDATE, t.ZAMOUNT
            FROM ZTRANSACTION t
            WHERE t.ZUSERPAYEE IN ({pk_list})
              AND (t.ZNOTE IS NULL OR t.ZNOTE = '')
              AND NOT EXISTS (
                  SELECT 1 FROM ZCASHFLOWTRANSACTIONENTRY cfte
                  WHERE cfte.ZPARENT = t.Z_PK
                    AND cfte.ZNOTE IS NOT NULL AND cfte.ZNOTE != ''
              )
            ORDER BY t.ZENTEREDDATE
        """)

    return cursor.fetchall()


 def match_single_item(tx_date, tx_amount, is_credit, orders):
    """Find single-item Amazon orders matching this transaction."""
    candidates = []
    for o in orders:
        if abs(o["amount"] - tx_amount) > 0.01:
            continue

        if is_credit:
            days = (tx_date - o["date"]).days
            if 0 <= days <= CREDIT_WINDOW:
                candidates.append((days, o["name"], o["order_id"]))
        else:
            days = abs((tx_date - o["date"]).days)
            if days <= DEBIT_WINDOW_WIDE:
                candidates.append((days, o["name"], o["order_id"]))

    candidates.sort(key=lambda x: x[0])
    return candidates


 def match_multi_item(tx_date, tx_amount, is_credit, order_totals):
    """Find multi-item Amazon orders whose total matches this transaction."""
    candidates = []
    for oid, info in order_totals.items():
        if abs(info["total"] - tx_amount) > 0.01:
            continue

        if is_credit:
            days = (tx_date - info["date"]).days
            if 0 <= days <= CREDIT_WINDOW:
                combined = "; ".join(n[:60] for n in info["names"])
                candidates.append((days, combined, oid))
        else:
            days = abs((tx_date - info["date"]).days)
            if days <= DEBIT_WINDOW_NARROW:
                combined = "; ".join(n[:60] for n in info["names"])
                candidates.append((days, combined, oid))

    candidates.sort(key=lambda x: x[0])
    return candidates


 def classify_match(candidates):
    """
    Given sorted candidates, return (certainty, best_name, all_candidates).
    Certainty: 'HIGH', 'MED', 'LOW', or None if no candidates.
    """
    if not candidates:
        return None, None, []

    best_days, best_name, best_oid = candidates[0]

    # Deduplicate by product name (different sizes/colors of same item)
    unique_names = set()
    for _, name, _ in candidates:
        unique_names.add(name[:80])

    unique_orders = set()
    for _, _, oid in candidates:
        unique_orders.add(oid)

    n_unique = len(unique_names)

    if n_unique == 1 and best_days <= HIGH_THRESHOLD:
        return "HIGH", best_name, candidates
    elif n_unique == 1 and best_days <= MED_THRESHOLD:
        return "MED", best_name, candidates
    elif n_unique == 1:
        return "LOW", best_name, candidates
    elif len(unique_orders) == 1 and best_days <= HIGH_THRESHOLD:
        # Multiple items from same order
        return "MED", best_name, candidates
    elif best_days <= HIGH_THRESHOLD:
        # Multiple candidates but very close date
        if len(candidates) >= 2 and candidates[1][0] > best_days + 7:
            return "MED", best_name, candidates  # clear winner by distance
        return "LOW", best_name, candidates
    else:
        return "LOW", best_name, candidates


 def run_matching(cursor, payee_pks, orders, order_totals):
    """Run the full matching pipeline. Returns list of match results."""
    txns = get_empty_memo_transactions(cursor, payee_pks)
    print(f"Found {len(txns)} truly-empty Amazon transactions to match")

    results = []
    for pk, entered_date, amount in txns:
        if pk in SKIP_PKS:
            continue

        is_credit = amount > 0
        tx_amount = round(abs(amount), 2)
        tx_date = datetime.utcfromtimestamp(entered_date + CORE_DATA_EPOCH)

        # Try single-item match first
        candidates = match_single_item(tx_date, tx_amount, is_credit, orders)

        # If no single-item match, try multi-item order totals
        if not candidates:
            candidates = match_multi_item(tx_date, tx_amount, is_credit, order_totals)

        certainty, best_name, all_candidates = classify_match(candidates)

        # Build memo
        memo = None
        if best_name:
            memo = best_name
            if is_credit and not memo.startswith("Return:"):
                memo = "Return: " + memo
            if len(memo) > MEMO_MAX_LENGTH:
                memo = memo[: MEMO_MAX_LENGTH - 3] + "..."

        results.append({
            "pk": pk,
            "date": tx_date.strftime("%Y-%m-%d"),
            "amount": tx_amount,
            "is_credit": is_credit,
            "certainty": certainty,
            "memo": memo,
            "days": candidates[0][0] if candidates else None,
            "candidate_count": len(set(c[1][:80] for c in all_candidates)) if all_candidates else 0,
        })

    return results


 def print_results(results):
    """Print a summary of matching results."""
    by_cert = defaultdict(list)
    for r in results:
        by_cert[r["certainty"] or "NONE"].append(r)

    total = len(results)
    print(f"\n{'='*80}")
    print(f"  MATCHING RESULTS: {total} transactions")
    print(f"{'='*80}")
    for level in ["HIGH", "MED", "LOW", "NONE"]:
        group = by_cert.get(level, [])
        pct = f"({100*len(group)/total:.0f}%)" if total else ""
        print(f"  {level:>4}: {len(group):>4} {pct}")

    for level in ["HIGH", "MED", "LOW", "NONE"]:
        group = by_cert.get(level, [])
        if not group:
            continue
        print(f"\n{'─'*80}")
        print(f"  {level} ({len(group)})")
        print(f"{'─'*80}")
        for r in group:
            cr = "CR" if r["is_credit"] else "DB"
            if r["memo"]:
                days_str = f"[{r['days']}d, {r['candidate_count']}cand]"
                print(f"  PK={r['pk']:>6} | {r['date']} | ${r['amount']:>8.2f} {cr} | {r['memo'][:65]} {days_str}")
            else:
                print(f"  PK={r['pk']:>6} | {r['date']} | ${r['amount']:>8.2f} {cr} | (no match)")


 def apply_updates(db_path, results, min_certainty="HIGH"):
    """
    Apply matched memos to the Quicken database.

    min_certainty: 'HIGH' (safest), 'MED', or 'ALL' (includes LOW)
    """
    levels = {"HIGH"}
    if min_certainty in ("MED", "ALL"):
        levels.add("MED")
    if min_certainty == "ALL":
        levels.add("LOW")

    to_apply = [r for r in results if r["certainty"] in levels and r["memo"]]

    if not to_apply:
        print("No updates to apply at this certainty level.")
        return

    # Create backup
    backup_path = db_path + f".backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    print(f"Creating backup: {backup_path}")
    shutil.copy2(db_path, backup_path)

    conn = sqlite3.connect(db_path)
    c = conn.cursor()

    applied = 0
    skipped = 0
    for r in to_apply:
        # Safety check: verify the memo is still empty
        c.execute("SELECT ZNOTE FROM ZTRANSACTION WHERE Z_PK = ?", (r["pk"],))
        row = c.fetchone()
        if row is None:
            print(f"  WARNING: PK={r['pk']} not found, skipping")
            skipped += 1
            continue
        if row[0] and row[0].strip():
            print(f"  WARNING: PK={r['pk']} already has memo, skipping")
            skipped += 1
            continue

        # Write the memo and increment Z_OPT (Core Data optimistic lock)
        c.execute(
            "UPDATE ZTRANSACTION SET ZNOTE = ?, Z_OPT = Z_OPT + 1 WHERE Z_PK = ?",
            (r["memo"], r["pk"]),
        )
        applied += 1

    conn.commit()
    conn.close()

    print(f"\nApplied {applied} updates ({skipped} skipped)")
    print(f"Backup saved to: {backup_path}")
    print("Restart Quicken to see changes.")


 def print_coverage(cursor, payee_pks):
    """Print current memo coverage statistics."""
    pk_list = ",".join(str(p) for p in payee_pks)

    cursor.execute(f"SELECT COUNT(*) FROM ZTRANSACTION WHERE ZUSERPAYEE IN ({pk_list})")
    total = cursor.fetchone()[0]

    cursor.execute(f"""
        SELECT COUNT(*) FROM ZTRANSACTION
        WHERE ZUSERPAYEE IN ({pk_list})
          AND ZNOTE IS NOT NULL AND ZNOTE != ''
    """)
    with_memo = cursor.fetchone()[0]

    cursor.execute(f"""
        SELECT COUNT(DISTINCT t.Z_PK) FROM ZTRANSACTION t
        JOIN ZCASHFLOWTRANSACTIONENTRY cfte ON cfte.ZPARENT = t.Z_PK
        WHERE t.ZUSERPAYEE IN ({pk_list})
          AND (t.ZNOTE IS NULL OR t.ZNOTE = '')
          AND cfte.ZNOTE IS NOT NULL AND cfte.ZNOTE != ''
    """)
    split_memo = cursor.fetchone()[0]

    empty = total - with_memo
    truly_empty = empty - split_memo

    print(f"\n{'='*50}")
    print(f"  COVERAGE REPORT")
    print(f"{'='*50}")
    print(f"  Total Amazon transactions:   {total:>5}")
    print(f"  With transaction-level memo: {with_memo:>5}  ({100*with_memo/total:.1f}%)")
    print(f"  Empty transaction-level:     {empty:>5}")
    print(f"    ├─ With split-level memos: {split_memo:>5}  (already documented)")
    print(f"    └─ Truly empty:            {truly_empty:>5}")
    print(f"  Effective coverage:          {with_memo+split_memo:>5}/{total}  ({100*(with_memo+split_memo)/total:.1f}%)")


 def main():
    parser = argparse.ArgumentParser(
        description="Match Quicken Amazon transactions to Amazon order history",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
 Examples:
  %(prog)s --scan              Preview all matches without writing anything
  %(prog)s --apply HIGH        Apply only HIGH certainty matches (safest)
  %(prog)s --apply MED         Apply HIGH + MED certainty matches
  %(prog)s --apply ALL         Apply all matches including LOW (review first!)
  %(prog)s --coverage          Show current memo coverage statistics
  %(prog)s --discover-payees   List Amazon-related payees in your Quicken DB
        """,
    )
    parser.add_argument("--scan", action="store_true", help="Scan and preview matches")
    parser.add_argument("--apply", choices=["HIGH", "MED", "ALL"], help="Apply matches at given certainty level")
    parser.add_argument("--coverage", action="store_true", help="Show current coverage stats")
    parser.add_argument("--discover-payees", action="store_true", help="Find Amazon payee names in DB")
    parser.add_argument("--include-split-memo", action="store_true",
                        help="Include transactions that already have split-level memos")
    args = parser.parse_args()

    if not any([args.scan, args.apply, args.coverage, args.discover_payees]):
        parser.print_help()
        sys.exit(1)

    # Validate paths
    if not os.path.exists(QUICKEN_DB_PATH):
        print(f"ERROR: Quicken database not found at:\n  {QUICKEN_DB_PATH}")
        print("Edit QUICKEN_DB_PATH in the CONFIG section of this script.")
        sys.exit(1)

    if args.discover_payees:
        conn = sqlite3.connect(QUICKEN_DB_PATH)
        c = conn.cursor()
        c.execute("""
            SELECT up.Z_PK, up.ZNAME, COUNT(t.Z_PK) as txn_count
            FROM ZUSERPAYEE up
            JOIN ZTRANSACTION t ON t.ZUSERPAYEE = up.Z_PK
            WHERE up.ZNAME LIKE '%amazon%' OR up.ZNAME LIKE '%amzn%'
               OR up.ZNAME LIKE '%Amazon%' OR up.ZNAME LIKE '%AMZN%'
            GROUP BY up.Z_PK
            ORDER BY txn_count DESC
        """)
        print("\nAmazon-related payees in your Quicken DB:")
        print(f"{'PK':>6}  {'Transactions':>12}  Name")
        print(f"{'─'*6}  {'─'*12}  {'─'*40}")
        for pk, name, count in c.fetchall():
            print(f"{pk:>6}  {count:>12}  {name}")
        conn.close()
        print("\nAdd these names to AMAZON_PAYEE_NAMES in the CONFIG section.")
        return

    conn = sqlite3.connect(QUICKEN_DB_PATH)
    c = conn.cursor()

    # Resolve payee PKs
    payee_pks = resolve_payee_pks(c, AMAZON_PAYEE_NAMES)
    if not payee_pks:
        print("ERROR: No matching payees found. Run --discover-payees first.")
        conn.close()
        sys.exit(1)
    print(f"Found {len(payee_pks)} Amazon payee entries")

    if args.coverage:
        print_coverage(c, payee_pks)
        conn.close()
        return

    # Load Amazon CSV
    if not os.path.exists(AMAZON_CSV_PATH):
        print(f"ERROR: Amazon CSV not found at:\n  {AMAZON_CSV_PATH}")
        print("Edit AMAZON_CSV_PATH in the CONFIG section of this script.")
        conn.close()
        sys.exit(1)

    print(f"Loading Amazon orders from: {AMAZON_CSV_PATH}")
    orders = load_amazon_orders(AMAZON_CSV_PATH)
    order_totals = build_order_totals(orders)
    print(f"  Loaded {len(orders)} order lines, {len(order_totals)} unique orders")

    if not orders:
        print("ERROR: No orders loaded. Check CSV format (needs: Order Date, Order ID, Product Name, Total Amount)")
        conn.close()
        sys.exit(1)

    # Show date range
    dates = [o["date"] for o in orders]
    print(f"  Date range: {min(dates).strftime('%Y-%m-%d')} to {max(dates).strftime('%Y-%m-%d')}")

    # Run matching
    results = run_matching(c, payee_pks, orders, order_totals)
    print_results(results)
    print_coverage(c, payee_pks)

    if args.apply:
        print(f"\n>>> Applying {args.apply} certainty matches...")
        conn.close()  # Close read connection before write
        apply_updates(QUICKEN_DB_PATH, results, args.apply)
    else:
        conn.close()
        print("\nRun with --apply HIGH to apply the safest matches.")


 if __name__ == "__main__":
    main()
diff --git a/QUICKEN_AMAZON_MATCHER_GUIDE.md b/QUICKEN_AMAZON_MATCHER_GUIDE.md
	#!/usr/bin/env python3
	"""
	Quicken ↔ Amazon Order History Matcher
	========================================
	Enriches Quicken transaction memos with Amazon product names by matching
	transactions to Amazon order history exports using date + amount heuristics.

	Works with Quicken's Core Data SQLite backend (macOS .quicken package).

	Usage:
	1. Export your Amazon order history from amazon.com/your-orders
	(or use a browser extension / Amazon data request to get Order History CSV)
	2. Configure the paths and payee names in the CONFIG section below
	3. Run: python3 quicken_amazon_matcher.py --scan # preview matches
	python3 quicken_amazon_matcher.py --apply HIGH # apply HIGH certainty
	python3 quicken_amazon_matcher.py --apply MED # apply HIGH + MED
	python3 quicken_amazon_matcher.py --apply ALL # apply everything matched
	4. Always creates a backup before writing. Restart Quicken to see changes.

	Requirements:
	- Python 3.7+
	- No external packages needed (stdlib only: sqlite3, csv, datetime, argparse)

	How Matching Works:
	- For purchases (debits): finds Amazon orders within ±14 days at the same amount
	- For returns (credits): searches backwards up to 120 days for the original purchase
	- Multi-item orders: sums line items per Order ID to match bundled transactions
	- Confidence tiers:
	HIGH = single match within 7 days (safe to auto-apply)
	MED = single match within 8-30 days, or close multi-match
	LOW = multiple candidates at same amount (needs manual review)

	Core Data Notes:
	- Quicken uses Apple Core Data (SQLite) with Z_PK, Z_ENT, Z_OPT columns
	- Z_OPT is an optimistic locking counter: MUST increment on every write
	- Date epoch is 2001-01-01 (add 978307200 for Unix timestamp)
	- Close Quicken BEFORE running writes; reopen after to pick up changes

	Author: Written with Claude (Anthropic) in an interactive Quicken enrichment project
	License: MIT
	"""

	import sqlite3
	import csv
	import os
	import sys
	import shutil
	import argparse
	from datetime import datetime, timedelta
	from collections import defaultdict

	# ============================================================================
	# CONFIG — Edit these for your setup
	# ============================================================================

	# Path to the Quicken SQLite database inside your .quicken package.
	# macOS default: ~/Library/Application Support/Quicken/Documents/<name>.quicken/data
	# Quicken Beta uses "Quicken Beta" instead of "Quicken" in the path.
	QUICKEN_DB_PATH = os.path.expanduser(
	"~/Library/Application Support/Quicken/Documents/My Finances.quicken/data"
	)

	# Path to the Amazon Order History CSV.
	# You can request this from Amazon (Your Account → Download order reports)
	# or use a browser extension that exports order history.
	# Expected columns: Order Date, Order ID, Product Name, Total Amount
	AMAZON_CSV_PATH = os.path.expanduser(
	"~/Downloads/Amazon Order History.csv"
	)

	# Payee names in Quicken that correspond to Amazon transactions.
	# Run the discovery query below if you're not sure which names to use:
	# SELECT DISTINCT up.ZNAME FROM ZTRANSACTION t
	# JOIN ZUSERPAYEE up ON t.ZUSERPAYEE = up.Z_PK
	# WHERE up.ZNAME LIKE '%amazon%' OR up.ZNAME LIKE '%amzn%'
	AMAZON_PAYEE_NAMES = [
	"Amazon.com",
	"Amazon",
	"AMZN Mktp US",
	"Amazon Marketplace",
	"Amazon.com*",
	"AMAZON.COM*AMZN.COM/BILL",
	]

	# Transaction PKs to always skip (e.g., multi-item orders you'll split manually,
	# Amazon Prime memberships you want to label yourself, etc.)
	SKIP_PKS = set()

	# Matching parameters (tune if needed)
	DEBIT_WINDOW_NARROW = 14 # days: first-pass window for purchases
	DEBIT_WINDOW_WIDE = 30 # days: expanded window for purchases (MED/LOW)
	CREDIT_WINDOW = 120 # days: how far back to look for original purchase
	HIGH_THRESHOLD = 7 # days: max distance for HIGH certainty
	MED_THRESHOLD = 30 # days: max distance for MED certainty
	MEMO_MAX_LENGTH = 500 # truncate memos longer than this

	# Core Data epoch offset (2001-01-01 in Unix time)
	CORE_DATA_EPOCH = 978307200

	# ============================================================================
	# END CONFIG
	# ============================================================================


	def parse_amazon_date(date_str):
	"""Parse Amazon CSV date, handling multiple formats."""
	date_str = date_str.strip()
	# ISO format: 2024-01-15T19:18:06Z
	if "T" in date_str:
	date_str = date_str[:10]
	# Standard: 2024-01-15 or 01/15/2024
	for fmt in ["%Y-%m-%d", "%m/%d/%Y", "%m/%d/%y", "%d/%m/%Y"]:
	try:
	return datetime.strptime(date_str, fmt)
	except ValueError:
	continue
	return None


	def parse_amazon_amount(amount_str):
	"""Parse Amazon CSV amount, handling currency symbols and commas."""
	cleaned = amount_str.strip().replace("$", "").replace(",", "").replace("£", "").replace("€", "")
	try:
	return round(float(cleaned), 2)
	except ValueError:
	return None


	def load_amazon_orders(csv_path):
	"""Load and parse the Amazon order history CSV."""
	orders = []
	with open(csv_path, "r", encoding="utf-8-sig") as f:
	reader = csv.DictReader(f)
	for row in reader:
	# Skip cancelled orders if status column exists
	status = row.get("Order Status", "").strip()
	if status.lower() == "cancelled":
	continue

	date = parse_amazon_date(row.get("Order Date", ""))
	amount = parse_amazon_amount(row.get("Total Amount", "0"))
	name = row.get("Product Name", "").strip()
	order_id = row.get("Order ID", "").strip()

	if date and amount and amount > 0 and name:
	orders.append({
	"date": date,
	"amount": amount,
	"name": name,
	"order_id": order_id,
	})
	return orders


	def build_order_totals(orders):
	"""Group orders by Order ID and compute totals for multi-item matching."""
	groups = defaultdict(list)
	for o in orders:
	groups[o["order_id"]].append(o)

	totals = {}
	for oid, items in groups.items():
	total = round(sum(i["amount"] for i in items), 2)
	names = [i["name"] for i in items]
	totals[oid] = {
	"total": total,
	"names": names,
	"date": items[0]["date"],
	"count": len(items),
	}
	return totals


	def resolve_payee_pks(cursor, payee_names):
	"""Look up Z_PK values for the given payee names."""
	pks = set()
	for name in payee_names:
	cursor.execute(
	"SELECT Z_PK FROM ZUSERPAYEE WHERE ZNAME = ? OR ZNAME LIKE ?",
	(name, name),
	)
	for row in cursor.fetchall():
	pks.add(row[0])

	if not pks:
	# Fallback: search broadly
	print("WARNING: No exact payee matches found. Trying broad search...")
	cursor.execute(
	"SELECT Z_PK, ZNAME FROM ZUSERPAYEE WHERE ZNAME LIKE '%amazon%' OR ZNAME LIKE '%amzn%'"
	)
	for row in cursor.fetchall():
	print(f" Found payee: PK={row[0]} Name='{row[1]}'")
	pks.add(row[0])

	return pks


	def get_empty_memo_transactions(cursor, payee_pks, include_split_memo=False):
	"""
	Fetch Amazon transactions that have no memo.

	If include_split_memo=False (default), also excludes transactions that have
	memos at the split/category level (ZCASHFLOWTRANSACTIONENTRY.ZNOTE), since
	those are already documented within their splits.
	"""
	pk_list = ",".join(str(p) for p in payee_pks)

	if include_split_memo:
	# Simple: just get everything with empty transaction-level memo
	cursor.execute(f"""
	SELECT Z_PK, ZENTEREDDATE, ZAMOUNT
	FROM ZTRANSACTION
	WHERE ZUSERPAYEE IN ({pk_list})
	AND (ZNOTE IS NULL OR ZNOTE = '')
	ORDER BY ZENTEREDDATE
	""")
	else:
	# Exclude transactions that have split-level memos
	cursor.execute(f"""
	SELECT t.Z_PK, t.ZENTEREDDATE, t.ZAMOUNT
	FROM ZTRANSACTION t
	WHERE t.ZUSERPAYEE IN ({pk_list})
	AND (t.ZNOTE IS NULL OR t.ZNOTE = '')
	AND NOT EXISTS (
	SELECT 1 FROM ZCASHFLOWTRANSACTIONENTRY cfte
	WHERE cfte.ZPARENT = t.Z_PK
	AND cfte.ZNOTE IS NOT NULL AND cfte.ZNOTE != ''
	)
	ORDER BY t.ZENTEREDDATE
	""")

	return cursor.fetchall()


	def match_single_item(tx_date, tx_amount, is_credit, orders):
	"""Find single-item Amazon orders matching this transaction."""
	candidates = []
	for o in orders:
	if abs(o["amount"] - tx_amount) > 0.01:
	continue

	if is_credit:
	days = (tx_date - o["date"]).days
	if 0 <= days <= CREDIT_WINDOW:
	candidates.append((days, o["name"], o["order_id"]))
	else:
	days = abs((tx_date - o["date"]).days)
	if days <= DEBIT_WINDOW_WIDE:
	candidates.append((days, o["name"], o["order_id"]))

	candidates.sort(key=lambda x: x[0])
	return candidates


	def match_multi_item(tx_date, tx_amount, is_credit, order_totals):
	"""Find multi-item Amazon orders whose total matches this transaction."""
	candidates = []
	for oid, info in order_totals.items():
	if abs(info["total"] - tx_amount) > 0.01:
	continue

	if is_credit:
	days = (tx_date - info["date"]).days
	if 0 <= days <= CREDIT_WINDOW:
	combined = "; ".join(n[:60] for n in info["names"])
	candidates.append((days, combined, oid))
	else:
	days = abs((tx_date - info["date"]).days)
	if days <= DEBIT_WINDOW_NARROW:
	combined = "; ".join(n[:60] for n in info["names"])
	candidates.append((days, combined, oid))

	candidates.sort(key=lambda x: x[0])
	return candidates


	def classify_match(candidates):
	"""
	Given sorted candidates, return (certainty, best_name, all_candidates).
	Certainty: 'HIGH', 'MED', 'LOW', or None if no candidates.
	"""
	if not candidates:
	return None, None, []

	best_days, best_name, best_oid = candidates[0]

	# Deduplicate by product name (different sizes/colors of same item)
	unique_names = set()
	for _, name, _ in candidates:
	unique_names.add(name[:80])

	unique_orders = set()
	for _, _, oid in candidates:
	unique_orders.add(oid)

	n_unique = len(unique_names)

	if n_unique == 1 and best_days <= HIGH_THRESHOLD:
	return "HIGH", best_name, candidates
	elif n_unique == 1 and best_days <= MED_THRESHOLD:
	return "MED", best_name, candidates
	elif n_unique == 1:
	return "LOW", best_name, candidates
	elif len(unique_orders) == 1 and best_days <= HIGH_THRESHOLD:
	# Multiple items from same order
	return "MED", best_name, candidates
	elif best_days <= HIGH_THRESHOLD:
	# Multiple candidates but very close date
	if len(candidates) >= 2 and candidates[1][0] > best_days + 7:
	return "MED", best_name, candidates # clear winner by distance
	return "LOW", best_name, candidates
	else:
	return "LOW", best_name, candidates


	def run_matching(cursor, payee_pks, orders, order_totals):
	"""Run the full matching pipeline. Returns list of match results."""
	txns = get_empty_memo_transactions(cursor, payee_pks)
	print(f"Found {len(txns)} truly-empty Amazon transactions to match")

	results = []
	for pk, entered_date, amount in txns:
	if pk in SKIP_PKS:
	continue

	is_credit = amount > 0
	tx_amount = round(abs(amount), 2)
	tx_date = datetime.utcfromtimestamp(entered_date + CORE_DATA_EPOCH)

	# Try single-item match first
	candidates = match_single_item(tx_date, tx_amount, is_credit, orders)

	# If no single-item match, try multi-item order totals
	if not candidates:
	candidates = match_multi_item(tx_date, tx_amount, is_credit, order_totals)

	certainty, best_name, all_candidates = classify_match(candidates)

	# Build memo
	memo = None
	if best_name:
	memo = best_name
	if is_credit and not memo.startswith("Return:"):
	memo = "Return: " + memo
	if len(memo) > MEMO_MAX_LENGTH:
	memo = memo[: MEMO_MAX_LENGTH - 3] + "..."

	results.append({
	"pk": pk,
	"date": tx_date.strftime("%Y-%m-%d"),
	"amount": tx_amount,
	"is_credit": is_credit,
	"certainty": certainty,
	"memo": memo,
	"days": candidates[0][0] if candidates else None,
	"candidate_count": len(set(c[1][:80] for c in all_candidates)) if all_candidates else 0,
	})

	return results


	def print_results(results):
	"""Print a summary of matching results."""
	by_cert = defaultdict(list)
	for r in results:
	by_cert[r["certainty"] or "NONE"].append(r)

	total = len(results)
	print(f"\n{'='*80}")
	print(f" MATCHING RESULTS: {total} transactions")
	print(f"{'='*80}")
	for level in ["HIGH", "MED", "LOW", "NONE"]:
	group = by_cert.get(level, [])
	pct = f"({100*len(group)/total:.0f}%)" if total else ""
	print(f" {level:>4}: {len(group):>4} {pct}")

	for level in ["HIGH", "MED", "LOW", "NONE"]:
	group = by_cert.get(level, [])
	if not group:
	continue
	print(f"\n{'─'*80}")
	print(f" {level} ({len(group)})")
	print(f"{'─'*80}")
	for r in group:
	cr = "CR" if r["is_credit"] else "DB"
	if r["memo"]:
	days_str = f"[{r['days']}d, {r['candidate_count']}cand]"
	print(f" PK={r['pk']:>6} \| {r['date']} \| ${r['amount']:>8.2f} {cr} \| {r['memo'][:65]} {days_str}")
	else:
	print(f" PK={r['pk']:>6} \| {r['date']} \| ${r['amount']:>8.2f} {cr} \| (no match)")


	def apply_updates(db_path, results, min_certainty="HIGH"):
	"""
	Apply matched memos to the Quicken database.

	min_certainty: 'HIGH' (safest), 'MED', or 'ALL' (includes LOW)
	"""
	levels = {"HIGH"}
	if min_certainty in ("MED", "ALL"):
	levels.add("MED")
	if min_certainty == "ALL":
	levels.add("LOW")

	to_apply = [r for r in results if r["certainty"] in levels and r["memo"]]

	if not to_apply:
	print("No updates to apply at this certainty level.")
	return

	# Create backup
	backup_path = db_path + f".backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
	print(f"Creating backup: {backup_path}")
	shutil.copy2(db_path, backup_path)

	conn = sqlite3.connect(db_path)
	c = conn.cursor()

	applied = 0
	skipped = 0
	for r in to_apply:
	# Safety check: verify the memo is still empty
	c.execute("SELECT ZNOTE FROM ZTRANSACTION WHERE Z_PK = ?", (r["pk"],))
	row = c.fetchone()
	if row is None:
	print(f" WARNING: PK={r['pk']} not found, skipping")
	skipped += 1
	continue
	if row[0] and row[0].strip():
	print(f" WARNING: PK={r['pk']} already has memo, skipping")
	skipped += 1
	continue

	# Write the memo and increment Z_OPT (Core Data optimistic lock)
	c.execute(
	"UPDATE ZTRANSACTION SET ZNOTE = ?, Z_OPT = Z_OPT + 1 WHERE Z_PK = ?",
	(r["memo"], r["pk"]),
	)
	applied += 1

	conn.commit()
	conn.close()

	print(f"\nApplied {applied} updates ({skipped} skipped)")
	print(f"Backup saved to: {backup_path}")
	print("Restart Quicken to see changes.")


	def print_coverage(cursor, payee_pks):
	"""Print current memo coverage statistics."""
	pk_list = ",".join(str(p) for p in payee_pks)

	cursor.execute(f"SELECT COUNT(*) FROM ZTRANSACTION WHERE ZUSERPAYEE IN ({pk_list})")
	total = cursor.fetchone()[0]

	cursor.execute(f"""
	SELECT COUNT(*) FROM ZTRANSACTION
	WHERE ZUSERPAYEE IN ({pk_list})
	AND ZNOTE IS NOT NULL AND ZNOTE != ''
	""")
	with_memo = cursor.fetchone()[0]

	cursor.execute(f"""
	SELECT COUNT(DISTINCT t.Z_PK) FROM ZTRANSACTION t
	JOIN ZCASHFLOWTRANSACTIONENTRY cfte ON cfte.ZPARENT = t.Z_PK
	WHERE t.ZUSERPAYEE IN ({pk_list})
	AND (t.ZNOTE IS NULL OR t.ZNOTE = '')
	AND cfte.ZNOTE IS NOT NULL AND cfte.ZNOTE != ''
	""")
	split_memo = cursor.fetchone()[0]

	empty = total - with_memo
	truly_empty = empty - split_memo

	print(f"\n{'='*50}")
	print(f" COVERAGE REPORT")
	print(f"{'='*50}")
	print(f" Total Amazon transactions: {total:>5}")
	print(f" With transaction-level memo: {with_memo:>5} ({100*with_memo/total:.1f}%)")
	print(f" Empty transaction-level: {empty:>5}")
	print(f" ├─ With split-level memos: {split_memo:>5} (already documented)")
	print(f" └─ Truly empty: {truly_empty:>5}")
	print(f" Effective coverage: {with_memo+split_memo:>5}/{total} ({100*(with_memo+split_memo)/total:.1f}%)")


	def main():
	parser = argparse.ArgumentParser(
	description="Match Quicken Amazon transactions to Amazon order history",
	formatter_class=argparse.RawDescriptionHelpFormatter,
	epilog="""
	Examples:
	%(prog)s --scan Preview all matches without writing anything
	%(prog)s --apply HIGH Apply only HIGH certainty matches (safest)
	%(prog)s --apply MED Apply HIGH + MED certainty matches
	%(prog)s --apply ALL Apply all matches including LOW (review first!)
	%(prog)s --coverage Show current memo coverage statistics
	%(prog)s --discover-payees List Amazon-related payees in your Quicken DB
	""",
	)
	parser.add_argument("--scan", action="store_true", help="Scan and preview matches")
	parser.add_argument("--apply", choices=["HIGH", "MED", "ALL"], help="Apply matches at given certainty level")
	parser.add_argument("--coverage", action="store_true", help="Show current coverage stats")
	parser.add_argument("--discover-payees", action="store_true", help="Find Amazon payee names in DB")
	parser.add_argument("--include-split-memo", action="store_true",
	help="Include transactions that already have split-level memos")
	args = parser.parse_args()

	if not any([args.scan, args.apply, args.coverage, args.discover_payees]):
	parser.print_help()
	sys.exit(1)

	# Validate paths
	if not os.path.exists(QUICKEN_DB_PATH):
	print(f"ERROR: Quicken database not found at:\n {QUICKEN_DB_PATH}")
	print("Edit QUICKEN_DB_PATH in the CONFIG section of this script.")
	sys.exit(1)

	if args.discover_payees:
	conn = sqlite3.connect(QUICKEN_DB_PATH)
	c = conn.cursor()
	c.execute("""
	SELECT up.Z_PK, up.ZNAME, COUNT(t.Z_PK) as txn_count
	FROM ZUSERPAYEE up
	JOIN ZTRANSACTION t ON t.ZUSERPAYEE = up.Z_PK
	WHERE up.ZNAME LIKE '%amazon%' OR up.ZNAME LIKE '%amzn%'
	OR up.ZNAME LIKE '%Amazon%' OR up.ZNAME LIKE '%AMZN%'
	GROUP BY up.Z_PK
	ORDER BY txn_count DESC
	""")
	print("\nAmazon-related payees in your Quicken DB:")
	print(f"{'PK':>6} {'Transactions':>12} Name")
	print(f"{'─'6} {'─'12} {'─'*40}")
	for pk, name, count in c.fetchall():
	print(f"{pk:>6} {count:>12} {name}")
	conn.close()
	print("\nAdd these names to AMAZON_PAYEE_NAMES in the CONFIG section.")
	return

	conn = sqlite3.connect(QUICKEN_DB_PATH)
	c = conn.cursor()

	# Resolve payee PKs
	payee_pks = resolve_payee_pks(c, AMAZON_PAYEE_NAMES)
	if not payee_pks:
	print("ERROR: No matching payees found. Run --discover-payees first.")
	conn.close()
	sys.exit(1)
	print(f"Found {len(payee_pks)} Amazon payee entries")

	if args.coverage:
	print_coverage(c, payee_pks)
	conn.close()
	return

	# Load Amazon CSV
	if not os.path.exists(AMAZON_CSV_PATH):
	print(f"ERROR: Amazon CSV not found at:\n {AMAZON_CSV_PATH}")
	print("Edit AMAZON_CSV_PATH in the CONFIG section of this script.")
	conn.close()
	sys.exit(1)

	print(f"Loading Amazon orders from: {AMAZON_CSV_PATH}")
	orders = load_amazon_orders(AMAZON_CSV_PATH)
	order_totals = build_order_totals(orders)
	print(f" Loaded {len(orders)} order lines, {len(order_totals)} unique orders")

	if not orders:
	print("ERROR: No orders loaded. Check CSV format (needs: Order Date, Order ID, Product Name, Total Amount)")
	conn.close()
	sys.exit(1)

	# Show date range
	dates = [o["date"] for o in orders]
	print(f" Date range: {min(dates).strftime('%Y-%m-%d')} to {max(dates).strftime('%Y-%m-%d')}")

	# Run matching
	results = run_matching(c, payee_pks, orders, order_totals)
	print_results(results)
	print_coverage(c, payee_pks)

	if args.apply:
	print(f"\n>>> Applying {args.apply} certainty matches...")
	conn.close() # Close read connection before write
	apply_updates(QUICKEN_DB_PATH, results, args.apply)
	else:
	conn.close()
	print("\nRun with --apply HIGH to apply the safest matches.")


	if __name__ == "__main__":
	main()
Tier	Criteria	Recommendation
HIGH	Single match within 7 days	Safe to auto-apply
MED	Single match within 8–30 days, or clear winner among multiple	Review a sample, then apply
LOW	Multiple different products at same price in time window	Needs human judgment