Skip to content

Instantly share code, notes, and snippets.

@leodr
Created February 14, 2026 19:40
Show Gist options
  • Select an option

  • Save leodr/1e72fdda17cdb5acd1b62bae7449ec1f to your computer and use it in GitHub Desktop.

Select an option

Save leodr/1e72fdda17cdb5acd1b62bae7449ec1f to your computer and use it in GitHub Desktop.
import argparse
import shutil
from pathlib import Path
import fitz # PyMuPDF
import lmstudio as lms
from pydantic import BaseModel
IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff"}
class FileInfoSchema(BaseModel):
document_type: str
title: str
year: int
month: int
day: int
class ImageCaptionSchema(BaseModel):
caption: str
def pdf_to_image_files(
pdf_path: Path, temp_dir: Path, max_size: int = 1500
) -> list[Path]:
"""Convert each page of a PDF to image files in temp_dir, resized so max dimension is max_size."""
image_paths = []
pdf_document = fitz.open(pdf_path)
for page_num in range(len(pdf_document)):
page = pdf_document.load_page(page_num)
# Calculate zoom to make larger side equal to max_size
rect = page.rect
scale = max_size / max(rect.width, rect.height)
matrix = fitz.Matrix(scale, scale)
pix = page.get_pixmap(matrix=matrix)
# Save directly from pixmap (faster, skip PIL)
image_path = temp_dir / f"page_{page_num:04d}.jpg"
pix.save(image_path)
image_paths.append(image_path)
pdf_document.close()
return image_paths
def extract_image_caption(file_path: Path) -> ImageCaptionSchema:
"""Extract a short caption from an image using the LLM."""
model = lms.llm("mistralai/ministral-3-3b")
chat = lms.Chat()
prompt = (
"Generate a short caption (max 5-7 words) that describes this image. "
"The caption should be concise and descriptive, suitable for use as a filename."
)
image_handle = lms.prepare_image(str(file_path))
chat.add_user_message(prompt, images=[image_handle])
prediction = model.respond(chat, response_format=ImageCaptionSchema)
parsed = prediction.parsed
if isinstance(parsed, dict):
return ImageCaptionSchema(**parsed)
return parsed
def extract_file_info(file_path: Path) -> FileInfoSchema:
"""Extract structured info from a PDF using the LLM."""
model = lms.llm("mistralai/ministral-3-3b")
chat = lms.Chat()
prompt = (
"Extract the following information from this document:\n"
"- document_type: The type of document (e.g. Rechnung, Vertrag, Brief, Bescheid, Kontoauszug, Rezept, Arztbrief, Gutschrift, Mahnung, Angebot, Lieferschein, Quittung, Zertifikat, etc.)\n"
"- title: A very short title (max 3-5 words) identifying the document, e.g. 'Hausarztpraxis München' or 'Amazon Bestellung'\n"
"- year: The year mentioned or relevant to the document\n"
"- month: The month (1-12)\n"
"- day: The day of the month (1-31)\n\n"
"If any date information is not available, make a reasonable guess based on context."
)
temp_dir = None
try:
# Create temp directory in the same folder as the PDF
temp_dir = file_path.parent / f".tmp_{file_path.stem}"
temp_dir.mkdir(exist_ok=True)
image_paths = pdf_to_image_files(file_path, temp_dir)
image_handles = [lms.prepare_image(str(p)) for p in image_paths]
chat.add_user_message(prompt, images=image_handles)
prediction = model.respond(chat, response_format=FileInfoSchema)
parsed = prediction.parsed
if isinstance(parsed, dict):
return FileInfoSchema(**parsed)
return parsed
finally:
# Clean up temp directory
if temp_dir and temp_dir.exists():
shutil.rmtree(temp_dir)
def sanitize_filename(s: str) -> str:
"""Remove forbidden characters from a filename component."""
# Forbidden characters in filenames (Unix: /, Windows: \ / : * ? " < > |)
forbidden = set('/\\:*?"<>|.')
return "".join(c for c in s if c not in forbidden).strip()
def construct_filename(info: FileInfoSchema, original_path: Path) -> str:
"""Construct a new filename from the extracted info (for PDFs)."""
safe_type = sanitize_filename(info.document_type)
safe_title = sanitize_filename(info.title)
date_str = f"{info.year:04d}-{info.month:02d}-{info.day:02d}"
new_name = f"{date_str} {safe_type} {safe_title}{original_path.suffix}"
return new_name
def construct_image_filename(caption: ImageCaptionSchema, original_path: Path) -> str:
"""Construct a new filename from the caption (for images)."""
safe_caption = sanitize_filename(caption.caption)
return f"{safe_caption}{original_path.suffix}"
def main():
parser = argparse.ArgumentParser(
description="Rename an image or PDF file based on extracted content."
)
parser.add_argument("file", type=str, help="Path to the image or PDF file")
args = parser.parse_args()
file_path = Path(args.file).resolve()
if not file_path.exists():
print(f"Error: File '{file_path}' does not exist.")
return 1
suffix = file_path.suffix.lower()
if suffix not in IMAGE_EXTENSIONS and suffix != ".pdf":
print(f"Error: File '{file_path}' is not a supported image or PDF file.")
return 1
print(f"Processing: {file_path}")
if suffix in IMAGE_EXTENSIONS:
caption = extract_image_caption(file_path)
print(f"Generated caption: {caption.caption}")
new_filename = construct_image_filename(caption, file_path)
else:
info = extract_file_info(file_path)
print(f"Extracted info: {info}")
new_filename = construct_filename(info, file_path)
new_path = file_path.parent / new_filename
if new_path.exists():
print(f"Error: Target file '{new_path}' already exists.")
return 1
file_path.rename(new_path)
print(f"Renamed to: {new_path}")
return 0
if __name__ == "__main__":
exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment