leodr · February 14, 2026 19:40
diff --git a/auto-rename.py b/auto-rename.py
 import argparse
 import shutil
 from pathlib import Path

 import fitz  # PyMuPDF
 import lmstudio as lms
 from pydantic import BaseModel

 IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff"}


 class FileInfoSchema(BaseModel):
    document_type: str
    title: str
    year: int
    month: int
    day: int


 class ImageCaptionSchema(BaseModel):
    caption: str


 def pdf_to_image_files(
    pdf_path: Path, temp_dir: Path, max_size: int = 1500
 ) -> list[Path]:
    """Convert each page of a PDF to image files in temp_dir, resized so max dimension is max_size."""
    image_paths = []
    pdf_document = fitz.open(pdf_path)
    for page_num in range(len(pdf_document)):
        page = pdf_document.load_page(page_num)
        # Calculate zoom to make larger side equal to max_size
        rect = page.rect
        scale = max_size / max(rect.width, rect.height)
        matrix = fitz.Matrix(scale, scale)
        pix = page.get_pixmap(matrix=matrix)

        # Save directly from pixmap (faster, skip PIL)
        image_path = temp_dir / f"page_{page_num:04d}.jpg"
        pix.save(image_path)
        image_paths.append(image_path)
    pdf_document.close()
    return image_paths


 def extract_image_caption(file_path: Path) -> ImageCaptionSchema:
    """Extract a short caption from an image using the LLM."""
    model = lms.llm("mistralai/ministral-3-3b")
    chat = lms.Chat()

    prompt = (
        "Generate a short caption (max 5-7 words) that describes this image. "
        "The caption should be concise and descriptive, suitable for use as a filename."
    )

    image_handle = lms.prepare_image(str(file_path))
    chat.add_user_message(prompt, images=[image_handle])

    prediction = model.respond(chat, response_format=ImageCaptionSchema)
    parsed = prediction.parsed
    if isinstance(parsed, dict):
        return ImageCaptionSchema(**parsed)
    return parsed


 def extract_file_info(file_path: Path) -> FileInfoSchema:
    """Extract structured info from a PDF using the LLM."""
    model = lms.llm("mistralai/ministral-3-3b")
    chat = lms.Chat()

    prompt = (
        "Extract the following information from this document:\n"
        "- document_type: The type of document (e.g. Rechnung, Vertrag, Brief, Bescheid, Kontoauszug, Rezept, Arztbrief, Gutschrift, Mahnung, Angebot, Lieferschein, Quittung, Zertifikat, etc.)\n"
        "- title: A very short title (max 3-5 words) identifying the document, e.g. 'Hausarztpraxis München' or 'Amazon Bestellung'\n"
        "- year: The year mentioned or relevant to the document\n"
        "- month: The month (1-12)\n"
        "- day: The day of the month (1-31)\n\n"
        "If any date information is not available, make a reasonable guess based on context."
    )

    temp_dir = None

    try:
        # Create temp directory in the same folder as the PDF
        temp_dir = file_path.parent / f".tmp_{file_path.stem}"
        temp_dir.mkdir(exist_ok=True)

        image_paths = pdf_to_image_files(file_path, temp_dir)
        image_handles = [lms.prepare_image(str(p)) for p in image_paths]
        chat.add_user_message(prompt, images=image_handles)

        prediction = model.respond(chat, response_format=FileInfoSchema)
        parsed = prediction.parsed
        if isinstance(parsed, dict):
            return FileInfoSchema(**parsed)
        return parsed
    finally:
        # Clean up temp directory
        if temp_dir and temp_dir.exists():
            shutil.rmtree(temp_dir)


 def sanitize_filename(s: str) -> str:
    """Remove forbidden characters from a filename component."""
    # Forbidden characters in filenames (Unix: /, Windows: \ / : * ? " < > |)
    forbidden = set('/\\:*?"<>|.')
    return "".join(c for c in s if c not in forbidden).strip()


 def construct_filename(info: FileInfoSchema, original_path: Path) -> str:
    """Construct a new filename from the extracted info (for PDFs)."""
    safe_type = sanitize_filename(info.document_type)
    safe_title = sanitize_filename(info.title)

    date_str = f"{info.year:04d}-{info.month:02d}-{info.day:02d}"
    new_name = f"{date_str} {safe_type} {safe_title}{original_path.suffix}"
    return new_name


 def construct_image_filename(caption: ImageCaptionSchema, original_path: Path) -> str:
    """Construct a new filename from the caption (for images)."""
    safe_caption = sanitize_filename(caption.caption)
    return f"{safe_caption}{original_path.suffix}"


 def main():
    parser = argparse.ArgumentParser(
        description="Rename an image or PDF file based on extracted content."
    )
    parser.add_argument("file", type=str, help="Path to the image or PDF file")
    args = parser.parse_args()

    file_path = Path(args.file).resolve()

    if not file_path.exists():
        print(f"Error: File '{file_path}' does not exist.")
        return 1

    suffix = file_path.suffix.lower()
    if suffix not in IMAGE_EXTENSIONS and suffix != ".pdf":
        print(f"Error: File '{file_path}' is not a supported image or PDF file.")
        return 1

    print(f"Processing: {file_path}")

    if suffix in IMAGE_EXTENSIONS:
        caption = extract_image_caption(file_path)
        print(f"Generated caption: {caption.caption}")
        new_filename = construct_image_filename(caption, file_path)
    else:
        info = extract_file_info(file_path)
        print(f"Extracted info: {info}")
        new_filename = construct_filename(info, file_path)
    new_path = file_path.parent / new_filename

    if new_path.exists():
        print(f"Error: Target file '{new_path}' already exists.")
        return 1

    file_path.rename(new_path)
    print(f"Renamed to: {new_path}")
    return 0


 if __name__ == "__main__":
    exit(main())
	import argparse
	import shutil
	from pathlib import Path

	import fitz # PyMuPDF
	import lmstudio as lms
	from pydantic import BaseModel

	IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff"}


	class FileInfoSchema(BaseModel):
	document_type: str
	title: str
	year: int
	month: int
	day: int


	class ImageCaptionSchema(BaseModel):
	caption: str


	def pdf_to_image_files(
	pdf_path: Path, temp_dir: Path, max_size: int = 1500
	) -> list[Path]:
	"""Convert each page of a PDF to image files in temp_dir, resized so max dimension is max_size."""
	image_paths = []
	pdf_document = fitz.open(pdf_path)
	for page_num in range(len(pdf_document)):
	page = pdf_document.load_page(page_num)
	# Calculate zoom to make larger side equal to max_size
	rect = page.rect
	scale = max_size / max(rect.width, rect.height)
	matrix = fitz.Matrix(scale, scale)
	pix = page.get_pixmap(matrix=matrix)

	# Save directly from pixmap (faster, skip PIL)
	image_path = temp_dir / f"page_{page_num:04d}.jpg"
	pix.save(image_path)
	image_paths.append(image_path)
	pdf_document.close()
	return image_paths


	def extract_image_caption(file_path: Path) -> ImageCaptionSchema:
	"""Extract a short caption from an image using the LLM."""
	model = lms.llm("mistralai/ministral-3-3b")
	chat = lms.Chat()

	prompt = (
	"Generate a short caption (max 5-7 words) that describes this image. "
	"The caption should be concise and descriptive, suitable for use as a filename."
	)

	image_handle = lms.prepare_image(str(file_path))
	chat.add_user_message(prompt, images=[image_handle])

	prediction = model.respond(chat, response_format=ImageCaptionSchema)
	parsed = prediction.parsed
	if isinstance(parsed, dict):
	return ImageCaptionSchema(**parsed)
	return parsed


	def extract_file_info(file_path: Path) -> FileInfoSchema:
	"""Extract structured info from a PDF using the LLM."""
	model = lms.llm("mistralai/ministral-3-3b")
	chat = lms.Chat()

	prompt = (
	"Extract the following information from this document:\n"
	"- document_type: The type of document (e.g. Rechnung, Vertrag, Brief, Bescheid, Kontoauszug, Rezept, Arztbrief, Gutschrift, Mahnung, Angebot, Lieferschein, Quittung, Zertifikat, etc.)\n"
	"- title: A very short title (max 3-5 words) identifying the document, e.g. 'Hausarztpraxis München' or 'Amazon Bestellung'\n"
	"- year: The year mentioned or relevant to the document\n"
	"- month: The month (1-12)\n"
	"- day: The day of the month (1-31)\n\n"
	"If any date information is not available, make a reasonable guess based on context."
	)

	temp_dir = None

	try:
	# Create temp directory in the same folder as the PDF
	temp_dir = file_path.parent / f".tmp_{file_path.stem}"
	temp_dir.mkdir(exist_ok=True)

	image_paths = pdf_to_image_files(file_path, temp_dir)
	image_handles = [lms.prepare_image(str(p)) for p in image_paths]
	chat.add_user_message(prompt, images=image_handles)

	prediction = model.respond(chat, response_format=FileInfoSchema)
	parsed = prediction.parsed
	if isinstance(parsed, dict):
	return FileInfoSchema(**parsed)
	return parsed
	finally:
	# Clean up temp directory
	if temp_dir and temp_dir.exists():
	shutil.rmtree(temp_dir)


	def sanitize_filename(s: str) -> str:
	"""Remove forbidden characters from a filename component."""
	# Forbidden characters in filenames (Unix: /, Windows: \ / : * ? " < > \|)
	forbidden = set('/\\:*?"<>\|.')
	return "".join(c for c in s if c not in forbidden).strip()


	def construct_filename(info: FileInfoSchema, original_path: Path) -> str:
	"""Construct a new filename from the extracted info (for PDFs)."""
	safe_type = sanitize_filename(info.document_type)
	safe_title = sanitize_filename(info.title)

	date_str = f"{info.year:04d}-{info.month:02d}-{info.day:02d}"
	new_name = f"{date_str} {safe_type} {safe_title}{original_path.suffix}"
	return new_name


	def construct_image_filename(caption: ImageCaptionSchema, original_path: Path) -> str:
	"""Construct a new filename from the caption (for images)."""
	safe_caption = sanitize_filename(caption.caption)
	return f"{safe_caption}{original_path.suffix}"


	def main():
	parser = argparse.ArgumentParser(
	description="Rename an image or PDF file based on extracted content."
	)
	parser.add_argument("file", type=str, help="Path to the image or PDF file")
	args = parser.parse_args()

	file_path = Path(args.file).resolve()

	if not file_path.exists():
	print(f"Error: File '{file_path}' does not exist.")
	return 1

	suffix = file_path.suffix.lower()
	if suffix not in IMAGE_EXTENSIONS and suffix != ".pdf":
	print(f"Error: File '{file_path}' is not a supported image or PDF file.")
	return 1

	print(f"Processing: {file_path}")

	if suffix in IMAGE_EXTENSIONS:
	caption = extract_image_caption(file_path)
	print(f"Generated caption: {caption.caption}")
	new_filename = construct_image_filename(caption, file_path)
	else:
	info = extract_file_info(file_path)
	print(f"Extracted info: {info}")
	new_filename = construct_filename(info, file_path)
	new_path = file_path.parent / new_filename

	if new_path.exists():
	print(f"Error: Target file '{new_path}' already exists.")
	return 1

	file_path.rename(new_path)
	print(f"Renamed to: {new_path}")
	return 0


	if __name__ == "__main__":
	exit(main())
No results found