|
#!/usr/bin/env python3 |
|
from __future__ import annotations |
|
|
|
import argparse |
|
import os |
|
import sys |
|
import time |
|
import subprocess |
|
from pathlib import Path |
|
from typing import Iterable, Set, List, Optional |
|
|
|
|
|
DEFAULT_SKIP_DIRS = { |
|
".git", "node_modules", ".venv", "dist", "build", "target", ".cache", |
|
".terraform", ".tox", ".mypy_cache", ".pytest_cache", ".next", ".nuxt", |
|
"Pods", "DerivedData", "Library", "Carthage", ".gradle", ".idea" |
|
} |
|
|
|
|
|
def eprint(*args, **kwargs): |
|
print(*args, file=sys.stderr, **kwargs) |
|
|
|
|
|
def run(cmd: List[str], *, cwd: Optional[Path] = None, timeout: Optional[int] = None) -> subprocess.CompletedProcess[str]: |
|
return subprocess.run( |
|
cmd, |
|
cwd=str(cwd) if cwd else None, |
|
text=True, |
|
capture_output=True, |
|
check=False, |
|
timeout=timeout, |
|
) |
|
|
|
|
|
def depth_from(root: Path, current: Path) -> int: |
|
try: |
|
return len(current.relative_to(root).parts) |
|
except Exception: |
|
return 10**9 |
|
|
|
|
|
def find_git_repos( |
|
roots: Iterable[Path], |
|
*, |
|
max_depth: int, |
|
skip_dirs: Set[str], |
|
progress_every: int = 2, |
|
) -> Set[Path]: |
|
repos: Set[Path] = set() |
|
start = time.time() |
|
last = start |
|
walked = 0 |
|
|
|
for root in roots: |
|
root = root.resolve() |
|
if not root.exists(): |
|
eprint(f"[scan] skip missing root: {root}") |
|
continue |
|
|
|
eprint(f"[scan] scanning: {root} (max_depth={max_depth})") |
|
|
|
for dirpath, dirnames, filenames in os.walk(root): |
|
walked += 1 |
|
p = Path(dirpath) |
|
|
|
d = depth_from(root, p) |
|
if d > max_depth: |
|
dirnames[:] = [] |
|
continue |
|
|
|
# progress heartbeat |
|
now = time.time() |
|
if now - last >= progress_every: |
|
eprint(f"[scan] ... walked {walked} dirs, found {len(repos)} repos so far (at: {p})") |
|
last = now |
|
|
|
# detect repo |
|
if ".git" in dirnames or ".git" in filenames: |
|
repos.add(p.resolve()) |
|
eprint(f"[scan] found repo: {p}") |
|
dirnames[:] = [] # don't descend further into the repo |
|
continue |
|
|
|
# prune heavy dirs |
|
if skip_dirs: |
|
dirnames[:] = [dname for dname in dirnames if dname not in skip_dirs] |
|
|
|
eprint(f"[scan] done. walked {walked} dirs, found {len(repos)} repo(s) in {time.time() - start:.1f}s") |
|
return repos |
|
|
|
|
|
def repo_toplevel(repo_hint: Path, *, timeout: Optional[int]) -> Optional[Path]: |
|
try: |
|
r = run(["git", "rev-parse", "--show-toplevel"], cwd=repo_hint, timeout=timeout) |
|
except subprocess.TimeoutExpired: |
|
eprint(f"[git] timeout rev-parse in {repo_hint}") |
|
return None |
|
|
|
if r.returncode != 0: |
|
return None |
|
return Path(r.stdout.strip()).resolve() |
|
|
|
|
|
def git_ignored_dirs(repo_root: Path, *, timeout: Optional[int]) -> Set[Path]: |
|
""" |
|
Use Git's ignore engine. --directory asks Git to report directories it considers ignored. |
|
""" |
|
try: |
|
r = run(["git", "ls-files", "-oi", "--exclude-standard", "--directory"], cwd=repo_root, timeout=timeout) |
|
except subprocess.TimeoutExpired: |
|
eprint(f"[git] timeout ls-files in {repo_root}") |
|
return set() |
|
|
|
if r.returncode != 0: |
|
return set() |
|
|
|
ignored: Set[Path] = set() |
|
for line in r.stdout.splitlines(): |
|
rel = line.strip() |
|
if not rel: |
|
continue |
|
rel = rel.rstrip("/") |
|
p = (repo_root / rel).resolve() |
|
if p.exists() and p.is_dir(): |
|
ignored.add(p) |
|
return ignored |
|
|
|
|
|
def tm_is_excluded(path: Path, *, timeout: Optional[int]) -> bool: |
|
try: |
|
r = run(["tmutil", "isexcluded", str(path)], timeout=timeout) |
|
except subprocess.TimeoutExpired: |
|
eprint(f"[tm] timeout isexcluded {path}") |
|
return False |
|
out = (r.stdout or "") + (r.stderr or "") |
|
return "[Excluded]" in out |
|
|
|
|
|
def tm_add_exclusion_fixed(path: Path, *, verbose: bool): |
|
cmd = ["sudo", "tmutil", "addexclusion", "-p", str(path)] |
|
if verbose: |
|
eprint("[tm] RUN:", " ".join(cmd)) |
|
subprocess.check_call(cmd) |
|
|
|
|
|
def main() -> int: |
|
ap = argparse.ArgumentParser(description="Exclude Git-ignored directories from Time Machine backups (with progress output).") |
|
ap.add_argument("roots", nargs="*", default=["."], help="Roots to scan (default: current directory).") |
|
ap.add_argument("--apply", action="store_true", help="Apply changes (default: dry-run).") |
|
ap.add_argument("--verbose", action="store_true") |
|
ap.add_argument("--max-depth", type=int, default=6, help="Max directory depth to scan under each root (default: 6).") |
|
ap.add_argument("--timeout", type=int, default=20, help="Timeout seconds for each git/tmutil call (default: 20).") |
|
ap.add_argument("--no-default-skip", action="store_true", help="Don't skip common heavy dirs during scan.") |
|
args = ap.parse_args() |
|
|
|
roots = [Path(r).expanduser() for r in args.roots] |
|
skip_dirs = set() if args.no_default_skip else set(DEFAULT_SKIP_DIRS) |
|
|
|
repo_hints = find_git_repos(roots, max_depth=args.max_depth, skip_dirs=skip_dirs) |
|
|
|
# normalize to top-level |
|
repos: Set[Path] = set() |
|
for hint in repo_hints: |
|
top = repo_toplevel(hint, timeout=args.timeout) |
|
if top: |
|
repos.add(top) |
|
|
|
eprint(f"[git] unique repos: {len(repos)}") |
|
if not repos: |
|
print("No git repos found.") |
|
return 0 |
|
|
|
all_dirs: Set[Path] = set() |
|
for i, repo in enumerate(sorted(repos), 1): |
|
eprint(f"[git] ({i}/{len(repos)}) collecting ignored dirs in: {repo}") |
|
dirs = git_ignored_dirs(repo, timeout=args.timeout) |
|
eprint(f"[git] ({i}/{len(repos)}) -> {len(dirs)} ignored dir(s)") |
|
all_dirs |= dirs |
|
|
|
eprint(f"[plan] total ignored dirs (deduped): {len(all_dirs)}") |
|
if not all_dirs: |
|
print("No ignored directories found.") |
|
return 0 |
|
|
|
to_add: List[Path] = [] |
|
checked = 0 |
|
for d in sorted(all_dirs): |
|
checked += 1 |
|
if checked % 200 == 0: |
|
eprint(f"[tm] checked {checked}/{len(all_dirs)} paths...") |
|
if not tm_is_excluded(d, timeout=args.timeout): |
|
to_add.append(d) |
|
|
|
eprint(f"[plan] need to add {len(to_add)} exclusions (already excluded: {len(all_dirs) - len(to_add)})") |
|
|
|
if not args.apply: |
|
print("Dry run. Would add fixed-path Time Machine exclusions for:") |
|
for d in to_add: |
|
print(" ", d) |
|
print("\nRe-run with --apply to actually add them.") |
|
return 0 |
|
|
|
if not to_add: |
|
print("Nothing to add.") |
|
return 0 |
|
|
|
# Apply |
|
for i, d in enumerate(to_add, 1): |
|
eprint(f"[tm] ({i}/{len(to_add)}) excluding: {d}") |
|
tm_add_exclusion_fixed(d, verbose=args.verbose) |
|
print("Excluded:", d) |
|
|
|
print("Done.") |
|
return 0 |
|
|
|
|
|
if __name__ == "__main__": |
|
raise SystemExit(main()) |