Created
December 13, 2025 18:29
-
-
Save gregmuellegger/3699d8ffb26ea39fb617c6e153f1775f to your computer and use it in GitHub Desktop.
Scripts to work convert PDFs to JPG and setting Exif Date and Description based on OCR text
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # Sets EXIF date on an image based on YYYY-MM-DD pattern in filename or manual date. | |
| set -euo pipefail | |
| show_help() { | |
| cat <<EOF | |
| Usage: $(basename "$0") [OPTIONS] <image> | |
| Sets EXIF date on an image file. Extracts date from YYYY-MM-DD pattern in | |
| filename, or uses manually specified date. Skips if EXIF date already exists. | |
| Options: | |
| -d, --date YYYY-MM-DD Set date manually instead of extracting from filename | |
| -t, --time TIME Set time as HH or HH:MM in 24h format (default: 12:00) | |
| -f, --force Overwrite existing EXIF date | |
| -h, --help Show this help message | |
| Timezone: Europe/Berlin | |
| Examples: | |
| $(basename "$0") 2024-05-18-vacation.jpg # Extract date, use 12:00 | |
| $(basename "$0") -t 18 2024-05-18-vacation.jpg # Set time to 18:00 | |
| $(basename "$0") -t 9:15 2024-05-18-vacation.jpg # Set time to 09:15 | |
| $(basename "$0") -d 2024-05-18 photo.jpg # Set date manually | |
| $(basename "$0") -f 2024-05-18-photo.jpg # Overwrite existing date | |
| EOF | |
| } | |
| manual_date="" | |
| manual_time="" | |
| force=false | |
| while [[ $# -gt 0 ]]; do | |
| case "$1" in | |
| -h|--help) | |
| show_help | |
| exit 0 | |
| ;; | |
| -d|--date) | |
| manual_date="$2" | |
| shift 2 | |
| ;; | |
| -t|--time) | |
| manual_time="$2" | |
| shift 2 | |
| ;; | |
| -f|--force) | |
| force=true | |
| shift | |
| ;; | |
| -*) | |
| echo "Error: Unknown option: $1" >&2 | |
| show_help >&2 | |
| exit 1 | |
| ;; | |
| *) | |
| input="$1" | |
| shift | |
| ;; | |
| esac | |
| done | |
| if [[ -z "${input:-}" ]]; then | |
| echo "Error: No input file specified" >&2 | |
| show_help >&2 | |
| exit 1 | |
| fi | |
| if [[ ! -f "$input" ]]; then | |
| echo "Error: File not found: $input" >&2 | |
| exit 1 | |
| fi | |
| # Check if EXIF date already exists | |
| if [[ "$force" == false ]]; then | |
| existing_date=$(exiftool -s3 -DateTimeOriginal "$input" 2>/dev/null || true) | |
| if [[ -n "$existing_date" ]]; then | |
| echo "Skipping: EXIF date already set ($existing_date). Use -f to overwrite." | |
| exit 0 | |
| fi | |
| fi | |
| # Determine date to use | |
| if [[ -n "$manual_date" ]]; then | |
| date="$manual_date" | |
| else | |
| # Extract YYYY-MM-DD from filename | |
| filename=$(basename "$input") | |
| if [[ "$filename" =~ ([0-9]{4}-[0-9]{2}-[0-9]{2}) ]]; then | |
| date="${BASH_REMATCH[1]}" | |
| else | |
| echo "Warning: No date found in filename: $filename" >&2 | |
| exit 1 | |
| fi | |
| fi | |
| # Validate date format | |
| if [[ ! "$date" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}$ ]]; then | |
| echo "Error: Invalid date format: $date (expected YYYY-MM-DD)" >&2 | |
| exit 1 | |
| fi | |
| # Parse time | |
| if [[ -n "$manual_time" ]]; then | |
| if [[ "$manual_time" =~ ^([0-9]{1,2})$ ]]; then | |
| # Just hour provided | |
| hour=$(printf "%02d" "${BASH_REMATCH[1]}") | |
| time_str="${hour}:00:00" | |
| elif [[ "$manual_time" =~ ^([0-9]{1,2}):([0-9]{2})$ ]]; then | |
| # HH:MM provided | |
| hour=$(printf "%02d" "${BASH_REMATCH[1]}") | |
| minute="${BASH_REMATCH[2]}" | |
| time_str="${hour}:${minute}:00" | |
| else | |
| echo "Error: Invalid time format: $manual_time (expected HH or HH:MM)" >&2 | |
| exit 1 | |
| fi | |
| else | |
| time_str="12:00:00" | |
| fi | |
| # Convert to EXIF format (YYYY:MM:DD HH:MM:SS) | |
| exif_date="${date//-/:} ${time_str}" | |
| exiftool -overwrite_original -DateTimeOriginal="$exif_date" -CreateDate="$exif_date" "$input" | |
| echo "Set EXIF date to ${date} ${time_str} (Europe/Berlin) on: $input" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # Adds GPS coordinates to images based on a location search term. | |
| set -euo pipefail | |
| show_help() { | |
| cat <<EOF | |
| Usage: $(basename "$0") [OPTIONS] -l "LOCATION" <image>... | |
| Geocodes a location search term and writes GPS coordinates to image EXIF data. | |
| Uses OpenStreetMap Nominatim API for geocoding. | |
| Options: | |
| -l, --location SEARCH Location search term (prompts if not provided) | |
| -y, --yes Skip confirmation prompt | |
| -f, --force Overwrite existing GPS data | |
| -h, --help Show this help message | |
| Examples: | |
| $(basename "$0") -l "Abraxas, Augsburg" photo.jpg | |
| $(basename "$0") -l "Eiffel Tower, Paris" *.jpg | |
| $(basename "$0") -y -l "Central Park, NYC" vacation/*.jpg | |
| EOF | |
| } | |
| location="" | |
| skip_confirm=false | |
| force=false | |
| inputs=() | |
| while [[ $# -gt 0 ]]; do | |
| case "$1" in | |
| -h|--help) | |
| show_help | |
| exit 0 | |
| ;; | |
| -l|--location) | |
| location="$2" | |
| shift 2 | |
| ;; | |
| -y|--yes) | |
| skip_confirm=true | |
| shift | |
| ;; | |
| -f|--force) | |
| force=true | |
| shift | |
| ;; | |
| -*) | |
| echo "Error: Unknown option: $1" >&2 | |
| show_help >&2 | |
| exit 1 | |
| ;; | |
| *) | |
| inputs+=("$1") | |
| shift | |
| ;; | |
| esac | |
| done | |
| if [[ -z "$location" ]]; then | |
| read -p "Enter location search term: " location | |
| if [[ -z "$location" ]]; then | |
| echo "Error: No location specified." >&2 | |
| exit 1 | |
| fi | |
| fi | |
| if [[ ${#inputs[@]} -eq 0 ]]; then | |
| echo "Error: No input files specified" >&2 | |
| show_help >&2 | |
| exit 1 | |
| fi | |
| # Validate all inputs exist | |
| for input in "${inputs[@]}"; do | |
| if [[ ! -f "$input" ]]; then | |
| echo "Error: File not found: $input" >&2 | |
| exit 1 | |
| fi | |
| done | |
| # Geocode the location using Nominatim | |
| echo "Searching for: $location" | |
| encoded_location=$(printf '%s' "$location" | jq -sRr @uri) | |
| response=$(curl -s "https://nominatim.openstreetmap.org/search?q=${encoded_location}&format=json&limit=1" \ | |
| -H "User-Agent: geocode-image-script/1.0") | |
| if [[ -z "$response" || "$response" == "[]" ]]; then | |
| echo "Error: Location not found: $location" >&2 | |
| exit 1 | |
| fi | |
| lat=$(echo "$response" | jq -r '.[0].lat') | |
| lon=$(echo "$response" | jq -r '.[0].lon') | |
| display_name=$(echo "$response" | jq -r '.[0].display_name') | |
| if [[ -z "$lat" || "$lat" == "null" ]]; then | |
| echo "Error: Could not parse coordinates from response" >&2 | |
| exit 1 | |
| fi | |
| echo "" | |
| echo "Found location:" | |
| echo " $display_name" | |
| echo " Coordinates: $lat, $lon" | |
| echo " https://www.openstreetmap.org/?mlat=${lat}&mlon=${lon}&zoom=17" | |
| echo "" | |
| echo "Files to update: ${#inputs[@]}" | |
| # Confirm unless -y flag | |
| if [[ "$skip_confirm" == false ]]; then | |
| read -p "Apply GPS coordinates to ${#inputs[@]} file(s)? [Y/n] " -n 1 -r | |
| echo | |
| if [[ $REPLY =~ ^[Nn]$ ]]; then | |
| echo "Aborted." | |
| exit 0 | |
| fi | |
| fi | |
| # Process each file | |
| for input in "${inputs[@]}"; do | |
| # Check for existing GPS data | |
| if [[ "$force" == false ]]; then | |
| existing_gps=$(exiftool -s3 -GPSLatitude "$input" 2>/dev/null || true) | |
| if [[ -n "$existing_gps" ]]; then | |
| echo "Skipping (has GPS data): $input. Use -f to overwrite." | |
| continue | |
| fi | |
| fi | |
| # Determine N/S and E/W references based on sign | |
| if [[ "$lat" == -* ]]; then | |
| lat_ref="S" | |
| local_lat="${lat#-}" | |
| else | |
| lat_ref="N" | |
| local_lat="$lat" | |
| fi | |
| if [[ "$lon" == -* ]]; then | |
| lon_ref="W" | |
| local_lon="${lon#-}" | |
| else | |
| lon_ref="E" | |
| local_lon="$lon" | |
| fi | |
| exiftool -overwrite_original \ | |
| -GPSLatitude="$local_lat" \ | |
| -GPSLongitude="$local_lon" \ | |
| -GPSLatitudeRef="$lat_ref" \ | |
| -GPSLongitudeRef="$lon_ref" \ | |
| "$input" > /dev/null | |
| echo "Updated: $input" | |
| done | |
| echo "Done." |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # OCRs an image and writes the text to the EXIF description field. | |
| set -euo pipefail | |
| show_help() { | |
| cat <<EOF | |
| Usage: $(basename "$0") [OPTIONS] <image>... | |
| OCRs images using Tesseract and writes extracted text to EXIF description. | |
| Writes to ImageDescription (EXIF), Description (XMP), and Caption-Abstract (IPTC). | |
| Options: | |
| -l, --language LANG OCR language (default: deu+eng) | |
| Available: $(tesseract --list-langs 2>/dev/null | tail -n +2 | tr '\n' ' ') | |
| -p, --print Print OCR text to stdout instead of writing to EXIF | |
| -f, --force Overwrite existing description | |
| -h, --help Show this help message | |
| Examples: | |
| $(basename "$0") scan.jpg # OCR and write to description | |
| $(basename "$0") -l eng document.jpg # Use English only | |
| $(basename "$0") -p image.jpg # Just print OCR result | |
| $(basename "$0") *.jpg # Process multiple images | |
| EOF | |
| } | |
| language="deu+eng" | |
| print_only=false | |
| force=false | |
| inputs=() | |
| while [[ $# -gt 0 ]]; do | |
| case "$1" in | |
| -h|--help) | |
| show_help | |
| exit 0 | |
| ;; | |
| -l|--language) | |
| language="$2" | |
| shift 2 | |
| ;; | |
| -p|--print) | |
| print_only=true | |
| shift | |
| ;; | |
| -f|--force) | |
| force=true | |
| shift | |
| ;; | |
| -*) | |
| echo "Error: Unknown option: $1" >&2 | |
| show_help >&2 | |
| exit 1 | |
| ;; | |
| *) | |
| inputs+=("$1") | |
| shift | |
| ;; | |
| esac | |
| done | |
| if [[ ${#inputs[@]} -eq 0 ]]; then | |
| echo "Error: No input files specified" >&2 | |
| show_help >&2 | |
| exit 1 | |
| fi | |
| # Validate all inputs exist | |
| for input in "${inputs[@]}"; do | |
| if [[ ! -f "$input" ]]; then | |
| echo "Error: File not found: $input" >&2 | |
| exit 1 | |
| fi | |
| done | |
| # Process each file | |
| for input in "${inputs[@]}"; do | |
| echo "Processing: $input" | |
| # Check for existing description | |
| if [[ "$force" == false && "$print_only" == false ]]; then | |
| existing=$(exiftool -s3 -ImageDescription "$input" 2>/dev/null || true) | |
| if [[ -n "$existing" ]]; then | |
| echo " Skipping (has description). Use -f to overwrite." | |
| continue | |
| fi | |
| fi | |
| # Run OCR | |
| ocr_text=$(tesseract "$input" stdout -l "$language" 2>/dev/null | sed 's/^[[:space:]]*//;s/[[:space:]]*$//' | tr '\n' ' ' | sed 's/ */ /g') | |
| if [[ -z "$ocr_text" ]]; then | |
| echo " No text found." | |
| continue | |
| fi | |
| if [[ "$print_only" == true ]]; then | |
| echo "$ocr_text" | |
| else | |
| # Write to multiple description fields for compatibility | |
| exiftool -overwrite_original \ | |
| -ImageDescription="$ocr_text" \ | |
| -Description="$ocr_text" \ | |
| -Caption-Abstract="$ocr_text" \ | |
| "$input" > /dev/null | |
| echo " Written: ${ocr_text:0:80}..." | |
| fi | |
| done | |
| echo "Done." |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # Stitches PDF pages and/or JPG images into a single JPG arranged in a grid. | |
| set -euo pipefail | |
| show_help() { | |
| cat <<EOF | |
| Usage: $(basename "$0") [OPTIONS] <input>... | |
| Stitches PDF pages and/or JPG images into a single JPG arranged in a grid. | |
| Accepts multiple PDF and JPG files as input. | |
| Options: | |
| -c, --columns N Number of columns / images per row (default: 1) | |
| -q, --quality N JPEG quality 1-100 (default: 80) | |
| -d, --dpi N PDF rendering density (default: 300) | |
| -g, --gap N Gap between images in pixels (default: 0) | |
| -b, --background COLOR Background color for gaps (default: black) | |
| -r, --resize WxH Resize each image before stitching (e.g., 800x600) | |
| -o, --output FILE Custom output filename | |
| -h, --help Show this help message | |
| Output naming (when -o not specified): | |
| - Single PDF: replaces .pdf with .jpg | |
| - Multiple files or JPG input: appends " (stitched)" to avoid overwriting | |
| Examples: | |
| $(basename "$0") document.pdf # Stack PDF pages vertically | |
| $(basename "$0") -c 2 document.pdf # 2 columns | |
| $(basename "$0") -g 10 -b white img1.jpg img2.jpg # 10px white gaps | |
| $(basename "$0") -r 800x600 *.jpg # Resize all to 800x600 | |
| $(basename "$0") -o out.jpg a.pdf b.jpg # Custom output name | |
| EOF | |
| } | |
| columns=1 | |
| quality=80 | |
| dpi=300 | |
| gap=0 | |
| background="black" | |
| resize="" | |
| output="" | |
| inputs=() | |
| while [[ $# -gt 0 ]]; do | |
| case "$1" in | |
| -h|--help) | |
| show_help | |
| exit 0 | |
| ;; | |
| -c|--columns) | |
| columns="$2" | |
| shift 2 | |
| ;; | |
| -q|--quality) | |
| quality="$2" | |
| shift 2 | |
| ;; | |
| -d|--dpi) | |
| dpi="$2" | |
| shift 2 | |
| ;; | |
| -g|--gap) | |
| gap="$2" | |
| shift 2 | |
| ;; | |
| -b|--background) | |
| background="$2" | |
| shift 2 | |
| ;; | |
| -r|--resize) | |
| resize="$2" | |
| shift 2 | |
| ;; | |
| -o|--output) | |
| output="$2" | |
| shift 2 | |
| ;; | |
| -*) | |
| echo "Error: Unknown option: $1" >&2 | |
| show_help >&2 | |
| exit 1 | |
| ;; | |
| *) | |
| inputs+=("$1") | |
| shift | |
| ;; | |
| esac | |
| done | |
| if [[ ${#inputs[@]} -eq 0 ]]; then | |
| echo "Error: No input files specified" >&2 | |
| show_help >&2 | |
| exit 1 | |
| fi | |
| # Validate all inputs exist | |
| for input in "${inputs[@]}"; do | |
| if [[ ! -f "$input" ]]; then | |
| echo "Error: File not found: $input" >&2 | |
| exit 1 | |
| fi | |
| done | |
| # Determine output filename if not specified | |
| if [[ -z "$output" ]]; then | |
| first_input="${inputs[0]}" | |
| first_basename=$(basename "$first_input") | |
| first_dir=$(dirname "$first_input") | |
| first_ext="${first_basename##*.}" | |
| first_name="${first_basename%.*}" | |
| # Use " (stitched)" suffix if multiple inputs or any JPG input to avoid overwriting | |
| needs_suffix=false | |
| if [[ ${#inputs[@]} -gt 1 ]]; then | |
| needs_suffix=true | |
| else | |
| ext_lower="${first_ext,,}" | |
| if [[ "$ext_lower" == "jpg" || "$ext_lower" == "jpeg" ]]; then | |
| needs_suffix=true | |
| fi | |
| fi | |
| if [[ "$needs_suffix" == true ]]; then | |
| output="${first_dir}/${first_name} (stitched).jpg" | |
| else | |
| output="${first_dir}/${first_name}.jpg" | |
| fi | |
| fi | |
| tmpdir=$(mktemp -d) | |
| trap 'rm -rf "$tmpdir"' EXIT | |
| # Process each input file | |
| page_counter=0 | |
| for input in "${inputs[@]}"; do | |
| ext="${input##*.}" | |
| ext_lower="${ext,,}" | |
| if [[ "$ext_lower" == "pdf" ]]; then | |
| # Convert PDF pages to images | |
| convert -density "$dpi" "$input" "$tmpdir/page-$(printf '%04d' $page_counter)-%04d.jpg" | |
| # Count how many pages were created and update counter | |
| new_pages=$(ls "$tmpdir"/page-*.jpg 2>/dev/null | wc -l) | |
| page_counter=$new_pages | |
| elif [[ "$ext_lower" == "jpg" || "$ext_lower" == "jpeg" ]]; then | |
| # Copy JPG to temp dir with sequential naming | |
| cp "$input" "$tmpdir/page-$(printf '%04d' $page_counter)-0000.jpg" | |
| ((page_counter++)) || true | |
| else | |
| echo "Error: Unsupported file type: $input (expected PDF or JPG)" >&2 | |
| exit 1 | |
| fi | |
| done | |
| # Build montage command | |
| montage_args=("$tmpdir"/page-*.jpg) | |
| montage_args+=(-tile "${columns}x") | |
| montage_args+=(-geometry "+${gap}+${gap}") | |
| montage_args+=(-background "$background") | |
| montage_args+=(-quality "$quality") | |
| if [[ -n "$resize" ]]; then | |
| montage_args+=(-resize "$resize") | |
| fi | |
| montage_args+=("$output") | |
| montage "${montage_args[@]}" | |
| echo "Created: $output" |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Workflow to upload memorabilia PDFs to Immich:
pdf2jpg-stitch.sh "2025-01-01 Invitationcard.pdf"exifdate-from-filename.sh "2025-01-01 Invitationcard.jpg"ocr-to-exif.sh "2025-01-01 Invitationcard.jpg"geocode-image.sh "2025-01-01 Invitationcard.jpg" -l "Timesquare, New York"