Skip to content

Instantly share code, notes, and snippets.

@gregmuellegger
Created December 13, 2025 18:29
Show Gist options
  • Select an option

  • Save gregmuellegger/3699d8ffb26ea39fb617c6e153f1775f to your computer and use it in GitHub Desktop.

Select an option

Save gregmuellegger/3699d8ffb26ea39fb617c6e153f1775f to your computer and use it in GitHub Desktop.
Scripts to work convert PDFs to JPG and setting Exif Date and Description based on OCR text
#!/bin/bash
# Sets EXIF date on an image based on YYYY-MM-DD pattern in filename or manual date.
set -euo pipefail
show_help() {
cat <<EOF
Usage: $(basename "$0") [OPTIONS] <image>
Sets EXIF date on an image file. Extracts date from YYYY-MM-DD pattern in
filename, or uses manually specified date. Skips if EXIF date already exists.
Options:
-d, --date YYYY-MM-DD Set date manually instead of extracting from filename
-t, --time TIME Set time as HH or HH:MM in 24h format (default: 12:00)
-f, --force Overwrite existing EXIF date
-h, --help Show this help message
Timezone: Europe/Berlin
Examples:
$(basename "$0") 2024-05-18-vacation.jpg # Extract date, use 12:00
$(basename "$0") -t 18 2024-05-18-vacation.jpg # Set time to 18:00
$(basename "$0") -t 9:15 2024-05-18-vacation.jpg # Set time to 09:15
$(basename "$0") -d 2024-05-18 photo.jpg # Set date manually
$(basename "$0") -f 2024-05-18-photo.jpg # Overwrite existing date
EOF
}
manual_date=""
manual_time=""
force=false
while [[ $# -gt 0 ]]; do
case "$1" in
-h|--help)
show_help
exit 0
;;
-d|--date)
manual_date="$2"
shift 2
;;
-t|--time)
manual_time="$2"
shift 2
;;
-f|--force)
force=true
shift
;;
-*)
echo "Error: Unknown option: $1" >&2
show_help >&2
exit 1
;;
*)
input="$1"
shift
;;
esac
done
if [[ -z "${input:-}" ]]; then
echo "Error: No input file specified" >&2
show_help >&2
exit 1
fi
if [[ ! -f "$input" ]]; then
echo "Error: File not found: $input" >&2
exit 1
fi
# Check if EXIF date already exists
if [[ "$force" == false ]]; then
existing_date=$(exiftool -s3 -DateTimeOriginal "$input" 2>/dev/null || true)
if [[ -n "$existing_date" ]]; then
echo "Skipping: EXIF date already set ($existing_date). Use -f to overwrite."
exit 0
fi
fi
# Determine date to use
if [[ -n "$manual_date" ]]; then
date="$manual_date"
else
# Extract YYYY-MM-DD from filename
filename=$(basename "$input")
if [[ "$filename" =~ ([0-9]{4}-[0-9]{2}-[0-9]{2}) ]]; then
date="${BASH_REMATCH[1]}"
else
echo "Warning: No date found in filename: $filename" >&2
exit 1
fi
fi
# Validate date format
if [[ ! "$date" =~ ^[0-9]{4}-[0-9]{2}-[0-9]{2}$ ]]; then
echo "Error: Invalid date format: $date (expected YYYY-MM-DD)" >&2
exit 1
fi
# Parse time
if [[ -n "$manual_time" ]]; then
if [[ "$manual_time" =~ ^([0-9]{1,2})$ ]]; then
# Just hour provided
hour=$(printf "%02d" "${BASH_REMATCH[1]}")
time_str="${hour}:00:00"
elif [[ "$manual_time" =~ ^([0-9]{1,2}):([0-9]{2})$ ]]; then
# HH:MM provided
hour=$(printf "%02d" "${BASH_REMATCH[1]}")
minute="${BASH_REMATCH[2]}"
time_str="${hour}:${minute}:00"
else
echo "Error: Invalid time format: $manual_time (expected HH or HH:MM)" >&2
exit 1
fi
else
time_str="12:00:00"
fi
# Convert to EXIF format (YYYY:MM:DD HH:MM:SS)
exif_date="${date//-/:} ${time_str}"
exiftool -overwrite_original -DateTimeOriginal="$exif_date" -CreateDate="$exif_date" "$input"
echo "Set EXIF date to ${date} ${time_str} (Europe/Berlin) on: $input"
#!/bin/bash
# Adds GPS coordinates to images based on a location search term.
set -euo pipefail
show_help() {
cat <<EOF
Usage: $(basename "$0") [OPTIONS] -l "LOCATION" <image>...
Geocodes a location search term and writes GPS coordinates to image EXIF data.
Uses OpenStreetMap Nominatim API for geocoding.
Options:
-l, --location SEARCH Location search term (prompts if not provided)
-y, --yes Skip confirmation prompt
-f, --force Overwrite existing GPS data
-h, --help Show this help message
Examples:
$(basename "$0") -l "Abraxas, Augsburg" photo.jpg
$(basename "$0") -l "Eiffel Tower, Paris" *.jpg
$(basename "$0") -y -l "Central Park, NYC" vacation/*.jpg
EOF
}
location=""
skip_confirm=false
force=false
inputs=()
while [[ $# -gt 0 ]]; do
case "$1" in
-h|--help)
show_help
exit 0
;;
-l|--location)
location="$2"
shift 2
;;
-y|--yes)
skip_confirm=true
shift
;;
-f|--force)
force=true
shift
;;
-*)
echo "Error: Unknown option: $1" >&2
show_help >&2
exit 1
;;
*)
inputs+=("$1")
shift
;;
esac
done
if [[ -z "$location" ]]; then
read -p "Enter location search term: " location
if [[ -z "$location" ]]; then
echo "Error: No location specified." >&2
exit 1
fi
fi
if [[ ${#inputs[@]} -eq 0 ]]; then
echo "Error: No input files specified" >&2
show_help >&2
exit 1
fi
# Validate all inputs exist
for input in "${inputs[@]}"; do
if [[ ! -f "$input" ]]; then
echo "Error: File not found: $input" >&2
exit 1
fi
done
# Geocode the location using Nominatim
echo "Searching for: $location"
encoded_location=$(printf '%s' "$location" | jq -sRr @uri)
response=$(curl -s "https://nominatim.openstreetmap.org/search?q=${encoded_location}&format=json&limit=1" \
-H "User-Agent: geocode-image-script/1.0")
if [[ -z "$response" || "$response" == "[]" ]]; then
echo "Error: Location not found: $location" >&2
exit 1
fi
lat=$(echo "$response" | jq -r '.[0].lat')
lon=$(echo "$response" | jq -r '.[0].lon')
display_name=$(echo "$response" | jq -r '.[0].display_name')
if [[ -z "$lat" || "$lat" == "null" ]]; then
echo "Error: Could not parse coordinates from response" >&2
exit 1
fi
echo ""
echo "Found location:"
echo " $display_name"
echo " Coordinates: $lat, $lon"
echo " https://www.openstreetmap.org/?mlat=${lat}&mlon=${lon}&zoom=17"
echo ""
echo "Files to update: ${#inputs[@]}"
# Confirm unless -y flag
if [[ "$skip_confirm" == false ]]; then
read -p "Apply GPS coordinates to ${#inputs[@]} file(s)? [Y/n] " -n 1 -r
echo
if [[ $REPLY =~ ^[Nn]$ ]]; then
echo "Aborted."
exit 0
fi
fi
# Process each file
for input in "${inputs[@]}"; do
# Check for existing GPS data
if [[ "$force" == false ]]; then
existing_gps=$(exiftool -s3 -GPSLatitude "$input" 2>/dev/null || true)
if [[ -n "$existing_gps" ]]; then
echo "Skipping (has GPS data): $input. Use -f to overwrite."
continue
fi
fi
# Determine N/S and E/W references based on sign
if [[ "$lat" == -* ]]; then
lat_ref="S"
local_lat="${lat#-}"
else
lat_ref="N"
local_lat="$lat"
fi
if [[ "$lon" == -* ]]; then
lon_ref="W"
local_lon="${lon#-}"
else
lon_ref="E"
local_lon="$lon"
fi
exiftool -overwrite_original \
-GPSLatitude="$local_lat" \
-GPSLongitude="$local_lon" \
-GPSLatitudeRef="$lat_ref" \
-GPSLongitudeRef="$lon_ref" \
"$input" > /dev/null
echo "Updated: $input"
done
echo "Done."
#!/bin/bash
# OCRs an image and writes the text to the EXIF description field.
set -euo pipefail
show_help() {
cat <<EOF
Usage: $(basename "$0") [OPTIONS] <image>...
OCRs images using Tesseract and writes extracted text to EXIF description.
Writes to ImageDescription (EXIF), Description (XMP), and Caption-Abstract (IPTC).
Options:
-l, --language LANG OCR language (default: deu+eng)
Available: $(tesseract --list-langs 2>/dev/null | tail -n +2 | tr '\n' ' ')
-p, --print Print OCR text to stdout instead of writing to EXIF
-f, --force Overwrite existing description
-h, --help Show this help message
Examples:
$(basename "$0") scan.jpg # OCR and write to description
$(basename "$0") -l eng document.jpg # Use English only
$(basename "$0") -p image.jpg # Just print OCR result
$(basename "$0") *.jpg # Process multiple images
EOF
}
language="deu+eng"
print_only=false
force=false
inputs=()
while [[ $# -gt 0 ]]; do
case "$1" in
-h|--help)
show_help
exit 0
;;
-l|--language)
language="$2"
shift 2
;;
-p|--print)
print_only=true
shift
;;
-f|--force)
force=true
shift
;;
-*)
echo "Error: Unknown option: $1" >&2
show_help >&2
exit 1
;;
*)
inputs+=("$1")
shift
;;
esac
done
if [[ ${#inputs[@]} -eq 0 ]]; then
echo "Error: No input files specified" >&2
show_help >&2
exit 1
fi
# Validate all inputs exist
for input in "${inputs[@]}"; do
if [[ ! -f "$input" ]]; then
echo "Error: File not found: $input" >&2
exit 1
fi
done
# Process each file
for input in "${inputs[@]}"; do
echo "Processing: $input"
# Check for existing description
if [[ "$force" == false && "$print_only" == false ]]; then
existing=$(exiftool -s3 -ImageDescription "$input" 2>/dev/null || true)
if [[ -n "$existing" ]]; then
echo " Skipping (has description). Use -f to overwrite."
continue
fi
fi
# Run OCR
ocr_text=$(tesseract "$input" stdout -l "$language" 2>/dev/null | sed 's/^[[:space:]]*//;s/[[:space:]]*$//' | tr '\n' ' ' | sed 's/ */ /g')
if [[ -z "$ocr_text" ]]; then
echo " No text found."
continue
fi
if [[ "$print_only" == true ]]; then
echo "$ocr_text"
else
# Write to multiple description fields for compatibility
exiftool -overwrite_original \
-ImageDescription="$ocr_text" \
-Description="$ocr_text" \
-Caption-Abstract="$ocr_text" \
"$input" > /dev/null
echo " Written: ${ocr_text:0:80}..."
fi
done
echo "Done."
#!/bin/bash
# Stitches PDF pages and/or JPG images into a single JPG arranged in a grid.
set -euo pipefail
show_help() {
cat <<EOF
Usage: $(basename "$0") [OPTIONS] <input>...
Stitches PDF pages and/or JPG images into a single JPG arranged in a grid.
Accepts multiple PDF and JPG files as input.
Options:
-c, --columns N Number of columns / images per row (default: 1)
-q, --quality N JPEG quality 1-100 (default: 80)
-d, --dpi N PDF rendering density (default: 300)
-g, --gap N Gap between images in pixels (default: 0)
-b, --background COLOR Background color for gaps (default: black)
-r, --resize WxH Resize each image before stitching (e.g., 800x600)
-o, --output FILE Custom output filename
-h, --help Show this help message
Output naming (when -o not specified):
- Single PDF: replaces .pdf with .jpg
- Multiple files or JPG input: appends " (stitched)" to avoid overwriting
Examples:
$(basename "$0") document.pdf # Stack PDF pages vertically
$(basename "$0") -c 2 document.pdf # 2 columns
$(basename "$0") -g 10 -b white img1.jpg img2.jpg # 10px white gaps
$(basename "$0") -r 800x600 *.jpg # Resize all to 800x600
$(basename "$0") -o out.jpg a.pdf b.jpg # Custom output name
EOF
}
columns=1
quality=80
dpi=300
gap=0
background="black"
resize=""
output=""
inputs=()
while [[ $# -gt 0 ]]; do
case "$1" in
-h|--help)
show_help
exit 0
;;
-c|--columns)
columns="$2"
shift 2
;;
-q|--quality)
quality="$2"
shift 2
;;
-d|--dpi)
dpi="$2"
shift 2
;;
-g|--gap)
gap="$2"
shift 2
;;
-b|--background)
background="$2"
shift 2
;;
-r|--resize)
resize="$2"
shift 2
;;
-o|--output)
output="$2"
shift 2
;;
-*)
echo "Error: Unknown option: $1" >&2
show_help >&2
exit 1
;;
*)
inputs+=("$1")
shift
;;
esac
done
if [[ ${#inputs[@]} -eq 0 ]]; then
echo "Error: No input files specified" >&2
show_help >&2
exit 1
fi
# Validate all inputs exist
for input in "${inputs[@]}"; do
if [[ ! -f "$input" ]]; then
echo "Error: File not found: $input" >&2
exit 1
fi
done
# Determine output filename if not specified
if [[ -z "$output" ]]; then
first_input="${inputs[0]}"
first_basename=$(basename "$first_input")
first_dir=$(dirname "$first_input")
first_ext="${first_basename##*.}"
first_name="${first_basename%.*}"
# Use " (stitched)" suffix if multiple inputs or any JPG input to avoid overwriting
needs_suffix=false
if [[ ${#inputs[@]} -gt 1 ]]; then
needs_suffix=true
else
ext_lower="${first_ext,,}"
if [[ "$ext_lower" == "jpg" || "$ext_lower" == "jpeg" ]]; then
needs_suffix=true
fi
fi
if [[ "$needs_suffix" == true ]]; then
output="${first_dir}/${first_name} (stitched).jpg"
else
output="${first_dir}/${first_name}.jpg"
fi
fi
tmpdir=$(mktemp -d)
trap 'rm -rf "$tmpdir"' EXIT
# Process each input file
page_counter=0
for input in "${inputs[@]}"; do
ext="${input##*.}"
ext_lower="${ext,,}"
if [[ "$ext_lower" == "pdf" ]]; then
# Convert PDF pages to images
convert -density "$dpi" "$input" "$tmpdir/page-$(printf '%04d' $page_counter)-%04d.jpg"
# Count how many pages were created and update counter
new_pages=$(ls "$tmpdir"/page-*.jpg 2>/dev/null | wc -l)
page_counter=$new_pages
elif [[ "$ext_lower" == "jpg" || "$ext_lower" == "jpeg" ]]; then
# Copy JPG to temp dir with sequential naming
cp "$input" "$tmpdir/page-$(printf '%04d' $page_counter)-0000.jpg"
((page_counter++)) || true
else
echo "Error: Unsupported file type: $input (expected PDF or JPG)" >&2
exit 1
fi
done
# Build montage command
montage_args=("$tmpdir"/page-*.jpg)
montage_args+=(-tile "${columns}x")
montage_args+=(-geometry "+${gap}+${gap}")
montage_args+=(-background "$background")
montage_args+=(-quality "$quality")
if [[ -n "$resize" ]]; then
montage_args+=(-resize "$resize")
fi
montage_args+=("$output")
montage "${montage_args[@]}"
echo "Created: $output"
@gregmuellegger
Copy link
Author

gregmuellegger commented Dec 13, 2025

Workflow to upload memorabilia PDFs to Immich:

  • Convert from PDF to JPG with pdf2jpg-stitch.sh "2025-01-01 Invitationcard.pdf"
  • Then set metadata as appropriate:
    • Set date from filename (I usually stick to YYYY-MM-DD as prefix to the filename for convention): exifdate-from-filename.sh "2025-01-01 Invitationcard.jpg"
    • Extract the text with OCR to the Exif description: ocr-to-exif.sh "2025-01-01 Invitationcard.jpg"
    • Add geolocation using the free OSM API: geocode-image.sh "2025-01-01 Invitationcard.jpg" -l "Timesquare, New York"

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment