Last active
February 8, 2026 18:35
-
-
Save oscmansan/c21052f4331861f7208adb72ecaac08a to your computer and use it in GitHub Desktop.
Fix PDF font embedding issues in thesis figures for submission to Papyrus @ UdeM. Requires pdffonts (poppler) and gs (ghostscript).
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """Check and fix PDF font embedding issues in thesis figures.""" | |
| import argparse | |
| import shutil | |
| import subprocess | |
| from pathlib import Path | |
| def check_pdf_fonts(pdf_path: str) -> list[str]: | |
| """ | |
| Check a PDF for font issues. Returns list of problem descriptions. | |
| """ | |
| try: | |
| result = subprocess.run( | |
| ["pdffonts", pdf_path], | |
| capture_output=True, text=True, timeout=30 | |
| ) | |
| if result.returncode != 0: | |
| return [f"error: {result.stderr.strip()}"] | |
| except Exception as e: | |
| return [f"error: {e}"] | |
| problems = [] | |
| for line in result.stdout.split("\n")[2:]: # Skip header lines | |
| if not line.strip(): | |
| continue | |
| if "Type 3" in line: | |
| problems.append("Type 3 (bitmap)") | |
| if line.split()[0] == "[none]": | |
| problems.append("unnamed font") | |
| # Check for 'no' in emb column (columns: name type encoding emb sub uni) | |
| parts = line.split() | |
| if len(parts) >= 6 and parts[-4] == "no": | |
| problems.append("not embedded") | |
| return problems | |
| def fix_pdf(input_path: Path, output_path: Path) -> bool: | |
| """ | |
| Fix a PDF by converting fonts to outlines with Ghostscript. | |
| """ | |
| output_path.parent.mkdir(parents=True, exist_ok=True) | |
| try: | |
| result = subprocess.run([ | |
| "gs", "-dNOPAUSE", "-dBATCH", "-sDEVICE=pdfwrite", | |
| "-dNoOutputFonts", | |
| f"-sOutputFile={output_path}", | |
| str(input_path) | |
| ], capture_output=True, timeout=60) | |
| return result.returncode == 0 | |
| except Exception: | |
| return False | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Check/fix PDF font embedding issues") | |
| parser.add_argument("--fix", action="store_true", help="Create fixed copy") | |
| parser.add_argument("--source", default="PhD_Thesis", help="Source directory") | |
| parser.add_argument("--dest", default=None, help="Destination (default: <source>_fixed)") | |
| args = parser.parse_args() | |
| source = Path(args.source) | |
| dest = Path(args.dest or f"{args.source}_fixed") | |
| if not source.exists(): | |
| print(f"Error: '{source}' not found") | |
| return | |
| # Find all PDFs in figures folders | |
| pdfs = list(source.glob("**/figures/**/*.pdf")) + list(source.glob("**/figures/*.pdf")) | |
| pdfs = sorted(set(pdfs)) | |
| print(f"Checking {len(pdfs)} PDFs in {source}/\n") | |
| # Check each PDF | |
| problematic = {} | |
| for pdf in pdfs: | |
| issues = check_pdf_fonts(str(pdf)) | |
| if issues: | |
| problematic[pdf] = issues | |
| print(f"❌ {pdf.relative_to(source)}") | |
| for issue in set(issues): | |
| print(f" └── {issue}") | |
| else: | |
| print(f"✅ {pdf.relative_to(source)}") | |
| # Summary | |
| print(f"\n{'='*50}") | |
| print(f"Clean: {len(pdfs) - len(problematic)} | Problematic: {len(problematic)}") | |
| if not args.fix or not problematic: | |
| if problematic and not args.fix: | |
| print(f"\nRun with --fix to create fixed copy at {dest}/") | |
| return | |
| # Create fixed copy | |
| print(f"\nCreating fixed copy at {dest}/") | |
| if dest.exists(): | |
| shutil.rmtree(dest) | |
| shutil.copytree(source, dest) | |
| # Fix problematic PDFs | |
| fixed, failed = 0, 0 | |
| for pdf in problematic: | |
| relative = pdf.relative_to(source) | |
| dest_pdf = dest / relative | |
| temp_pdf = dest_pdf.with_suffix(".tmp.pdf") | |
| if fix_pdf(pdf, temp_pdf): | |
| temp_pdf.replace(dest_pdf) | |
| print(f"✅ Fixed: {relative}") | |
| fixed += 1 | |
| else: | |
| if temp_pdf.exists(): | |
| temp_pdf.unlink() | |
| print(f"❌ Failed: {relative}") | |
| failed += 1 | |
| print(f"\nFixed: {fixed} | Failed: {failed}") | |
| print(f"Output: {dest}/") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment