Skip to content

Instantly share code, notes, and snippets.

@alexandrusavin
Created December 14, 2025 08:43
Show Gist options
  • Select an option

  • Save alexandrusavin/052538b7fe3505a2f58fa4f4b4b8858f to your computer and use it in GitHub Desktop.

Select an option

Save alexandrusavin/052538b7fe3505a2f58fa4f4b4b8858f to your computer and use it in GitHub Desktop.

Exclude Git-Ignored Directories from Time Machine

Automatically find Git repositories, ask Git which directories are ignored, and add those directories to the macOS Time Machine exclusion list.

No .gitignore parsing. Git is the source of truth.


Why

Time Machine happily backs up things you rarely want restored:

  • node_modules/
  • dist/, build/, target/
  • .venv/, .tox/
  • caches and generated artifacts

Git already knows what’s disposable. This script reuses that knowledge to keep backups smaller and faster.


What it does

  • Recursively scans given roots for Git repositories

  • Uses Git’s ignore engine:

    git ls-files -oi --exclude-standard --directory

  • Collects ignored directories (not files)

  • De-duplicates paths across repos

  • Adds fixed-path exclusions:

    tmutil addexclusion -p

  • Prints progress so it doesn’t look like it’s hanging

  • Dry-run by default


Requirements

  • macOS
  • Python 3.9+
  • Git
  • Time Machine enabled
  • Full Disk Access granted to your Terminal / Python interpreter
  • sudo privileges (needed for persistent exclusions)

Usage

Dry run (recommended):

python3 tm_exclude_gitignored.py ~/code

Apply exclusions:

python3 tm_exclude_gitignored.py ~/code --apply

Scan multiple roots:

python3 tm_exclude_gitignored.py ~/code ~/work

Tune scanning:

python3 tm_exclude_gitignored.py ~/code --max-depth 10 --timeout 30

Output behavior

  • stderr: progress and diagnostics
  • stdout: actual results

Examples:

# Hide progress, keep results
python3 tm_exclude_gitignored.py 2>/dev/null

# Save results, keep progress visible
python3 tm_exclude_gitignored.py > exclusions.txt

Safety decisions

  • Only directories are excluded
  • Git semantics are respected (.gitignore, .git/info/exclude, global excludes)
  • Existing Time Machine exclusions are detected and skipped
  • Common heavy directories are skipped during scanning for performance

Limitations

  • Only affects paths inside Git repositories
  • Submodules aren’t included by default
  • No backup size estimation
  • Time Machine exclusions are global, not per-project

Why not parse .gitignore?

Because it’s fragile and easy to get wrong.

Git ignore rules include nested scopes, negations, globs, **, and repo-root anchoring. Git already computes the final answer — this script just consumes it.


One-liner

If Git ignores it, Time Machine should too.

#!/usr/bin/env python3
from __future__ import annotations
import argparse
import os
import sys
import time
import subprocess
from pathlib import Path
from typing import Iterable, Set, List, Optional
DEFAULT_SKIP_DIRS = {
".git", "node_modules", ".venv", "dist", "build", "target", ".cache",
".terraform", ".tox", ".mypy_cache", ".pytest_cache", ".next", ".nuxt",
"Pods", "DerivedData", "Library", "Carthage", ".gradle", ".idea"
}
def eprint(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
def run(cmd: List[str], *, cwd: Optional[Path] = None, timeout: Optional[int] = None) -> subprocess.CompletedProcess[str]:
return subprocess.run(
cmd,
cwd=str(cwd) if cwd else None,
text=True,
capture_output=True,
check=False,
timeout=timeout,
)
def depth_from(root: Path, current: Path) -> int:
try:
return len(current.relative_to(root).parts)
except Exception:
return 10**9
def find_git_repos(
roots: Iterable[Path],
*,
max_depth: int,
skip_dirs: Set[str],
progress_every: int = 2,
) -> Set[Path]:
repos: Set[Path] = set()
start = time.time()
last = start
walked = 0
for root in roots:
root = root.resolve()
if not root.exists():
eprint(f"[scan] skip missing root: {root}")
continue
eprint(f"[scan] scanning: {root} (max_depth={max_depth})")
for dirpath, dirnames, filenames in os.walk(root):
walked += 1
p = Path(dirpath)
d = depth_from(root, p)
if d > max_depth:
dirnames[:] = []
continue
# progress heartbeat
now = time.time()
if now - last >= progress_every:
eprint(f"[scan] ... walked {walked} dirs, found {len(repos)} repos so far (at: {p})")
last = now
# detect repo
if ".git" in dirnames or ".git" in filenames:
repos.add(p.resolve())
eprint(f"[scan] found repo: {p}")
dirnames[:] = [] # don't descend further into the repo
continue
# prune heavy dirs
if skip_dirs:
dirnames[:] = [dname for dname in dirnames if dname not in skip_dirs]
eprint(f"[scan] done. walked {walked} dirs, found {len(repos)} repo(s) in {time.time() - start:.1f}s")
return repos
def repo_toplevel(repo_hint: Path, *, timeout: Optional[int]) -> Optional[Path]:
try:
r = run(["git", "rev-parse", "--show-toplevel"], cwd=repo_hint, timeout=timeout)
except subprocess.TimeoutExpired:
eprint(f"[git] timeout rev-parse in {repo_hint}")
return None
if r.returncode != 0:
return None
return Path(r.stdout.strip()).resolve()
def git_ignored_dirs(repo_root: Path, *, timeout: Optional[int]) -> Set[Path]:
"""
Use Git's ignore engine. --directory asks Git to report directories it considers ignored.
"""
try:
r = run(["git", "ls-files", "-oi", "--exclude-standard", "--directory"], cwd=repo_root, timeout=timeout)
except subprocess.TimeoutExpired:
eprint(f"[git] timeout ls-files in {repo_root}")
return set()
if r.returncode != 0:
return set()
ignored: Set[Path] = set()
for line in r.stdout.splitlines():
rel = line.strip()
if not rel:
continue
rel = rel.rstrip("/")
p = (repo_root / rel).resolve()
if p.exists() and p.is_dir():
ignored.add(p)
return ignored
def tm_is_excluded(path: Path, *, timeout: Optional[int]) -> bool:
try:
r = run(["tmutil", "isexcluded", str(path)], timeout=timeout)
except subprocess.TimeoutExpired:
eprint(f"[tm] timeout isexcluded {path}")
return False
out = (r.stdout or "") + (r.stderr or "")
return "[Excluded]" in out
def tm_add_exclusion_fixed(path: Path, *, verbose: bool):
cmd = ["sudo", "tmutil", "addexclusion", "-p", str(path)]
if verbose:
eprint("[tm] RUN:", " ".join(cmd))
subprocess.check_call(cmd)
def main() -> int:
ap = argparse.ArgumentParser(description="Exclude Git-ignored directories from Time Machine backups (with progress output).")
ap.add_argument("roots", nargs="*", default=["."], help="Roots to scan (default: current directory).")
ap.add_argument("--apply", action="store_true", help="Apply changes (default: dry-run).")
ap.add_argument("--verbose", action="store_true")
ap.add_argument("--max-depth", type=int, default=6, help="Max directory depth to scan under each root (default: 6).")
ap.add_argument("--timeout", type=int, default=20, help="Timeout seconds for each git/tmutil call (default: 20).")
ap.add_argument("--no-default-skip", action="store_true", help="Don't skip common heavy dirs during scan.")
args = ap.parse_args()
roots = [Path(r).expanduser() for r in args.roots]
skip_dirs = set() if args.no_default_skip else set(DEFAULT_SKIP_DIRS)
repo_hints = find_git_repos(roots, max_depth=args.max_depth, skip_dirs=skip_dirs)
# normalize to top-level
repos: Set[Path] = set()
for hint in repo_hints:
top = repo_toplevel(hint, timeout=args.timeout)
if top:
repos.add(top)
eprint(f"[git] unique repos: {len(repos)}")
if not repos:
print("No git repos found.")
return 0
all_dirs: Set[Path] = set()
for i, repo in enumerate(sorted(repos), 1):
eprint(f"[git] ({i}/{len(repos)}) collecting ignored dirs in: {repo}")
dirs = git_ignored_dirs(repo, timeout=args.timeout)
eprint(f"[git] ({i}/{len(repos)}) -> {len(dirs)} ignored dir(s)")
all_dirs |= dirs
eprint(f"[plan] total ignored dirs (deduped): {len(all_dirs)}")
if not all_dirs:
print("No ignored directories found.")
return 0
to_add: List[Path] = []
checked = 0
for d in sorted(all_dirs):
checked += 1
if checked % 200 == 0:
eprint(f"[tm] checked {checked}/{len(all_dirs)} paths...")
if not tm_is_excluded(d, timeout=args.timeout):
to_add.append(d)
eprint(f"[plan] need to add {len(to_add)} exclusions (already excluded: {len(all_dirs) - len(to_add)})")
if not args.apply:
print("Dry run. Would add fixed-path Time Machine exclusions for:")
for d in to_add:
print(" ", d)
print("\nRe-run with --apply to actually add them.")
return 0
if not to_add:
print("Nothing to add.")
return 0
# Apply
for i, d in enumerate(to_add, 1):
eprint(f"[tm] ({i}/{len(to_add)}) excluding: {d}")
tm_add_exclusion_fixed(d, verbose=args.verbose)
print("Excluded:", d)
print("Done.")
return 0
if __name__ == "__main__":
raise SystemExit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment