Skip to content

Instantly share code, notes, and snippets.

@oscmansan
Last active February 8, 2026 18:35
Show Gist options
  • Select an option

  • Save oscmansan/c21052f4331861f7208adb72ecaac08a to your computer and use it in GitHub Desktop.

Select an option

Save oscmansan/c21052f4331861f7208adb72ecaac08a to your computer and use it in GitHub Desktop.
Fix PDF font embedding issues in thesis figures for submission to Papyrus @ UdeM. Requires pdffonts (poppler) and gs (ghostscript).
"""Check and fix PDF font embedding issues in thesis figures."""
import argparse
import shutil
import subprocess
from pathlib import Path
def check_pdf_fonts(pdf_path: str) -> list[str]:
"""
Check a PDF for font issues. Returns list of problem descriptions.
"""
try:
result = subprocess.run(
["pdffonts", pdf_path],
capture_output=True, text=True, timeout=30
)
if result.returncode != 0:
return [f"error: {result.stderr.strip()}"]
except Exception as e:
return [f"error: {e}"]
problems = []
for line in result.stdout.split("\n")[2:]: # Skip header lines
if not line.strip():
continue
if "Type 3" in line:
problems.append("Type 3 (bitmap)")
if line.split()[0] == "[none]":
problems.append("unnamed font")
# Check for 'no' in emb column (columns: name type encoding emb sub uni)
parts = line.split()
if len(parts) >= 6 and parts[-4] == "no":
problems.append("not embedded")
return problems
def fix_pdf(input_path: Path, output_path: Path) -> bool:
"""
Fix a PDF by converting fonts to outlines with Ghostscript.
"""
output_path.parent.mkdir(parents=True, exist_ok=True)
try:
result = subprocess.run([
"gs", "-dNOPAUSE", "-dBATCH", "-sDEVICE=pdfwrite",
"-dNoOutputFonts",
f"-sOutputFile={output_path}",
str(input_path)
], capture_output=True, timeout=60)
return result.returncode == 0
except Exception:
return False
def main():
parser = argparse.ArgumentParser(description="Check/fix PDF font embedding issues")
parser.add_argument("--fix", action="store_true", help="Create fixed copy")
parser.add_argument("--source", default="PhD_Thesis", help="Source directory")
parser.add_argument("--dest", default=None, help="Destination (default: <source>_fixed)")
args = parser.parse_args()
source = Path(args.source)
dest = Path(args.dest or f"{args.source}_fixed")
if not source.exists():
print(f"Error: '{source}' not found")
return
# Find all PDFs in figures folders
pdfs = list(source.glob("**/figures/**/*.pdf")) + list(source.glob("**/figures/*.pdf"))
pdfs = sorted(set(pdfs))
print(f"Checking {len(pdfs)} PDFs in {source}/\n")
# Check each PDF
problematic = {}
for pdf in pdfs:
issues = check_pdf_fonts(str(pdf))
if issues:
problematic[pdf] = issues
print(f"❌ {pdf.relative_to(source)}")
for issue in set(issues):
print(f" └── {issue}")
else:
print(f"✅ {pdf.relative_to(source)}")
# Summary
print(f"\n{'='*50}")
print(f"Clean: {len(pdfs) - len(problematic)} | Problematic: {len(problematic)}")
if not args.fix or not problematic:
if problematic and not args.fix:
print(f"\nRun with --fix to create fixed copy at {dest}/")
return
# Create fixed copy
print(f"\nCreating fixed copy at {dest}/")
if dest.exists():
shutil.rmtree(dest)
shutil.copytree(source, dest)
# Fix problematic PDFs
fixed, failed = 0, 0
for pdf in problematic:
relative = pdf.relative_to(source)
dest_pdf = dest / relative
temp_pdf = dest_pdf.with_suffix(".tmp.pdf")
if fix_pdf(pdf, temp_pdf):
temp_pdf.replace(dest_pdf)
print(f"✅ Fixed: {relative}")
fixed += 1
else:
if temp_pdf.exists():
temp_pdf.unlink()
print(f"❌ Failed: {relative}")
failed += 1
print(f"\nFixed: {fixed} | Failed: {failed}")
print(f"Output: {dest}/")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment