Last active
December 19, 2025 10:10
-
-
Save jin-zhe/53a8f5783aafdc6afb91b0e1e86432c4 to your computer and use it in GitHub Desktop.
Finds large files under given directory (optionally matching given extension) with specifiable filesize limit (default 10MB). Good for identifying files for GIT LFS.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| set -euo pipefail | |
| # --- Default Values --- | |
| TARGET_DIR="." | |
| FILE_EXT="" | |
| SIZE_LIMIT_MB=10 | |
| OUTPUT_FILE="large_files_report.txt" | |
| # --- Help Function --- | |
| usage() { | |
| echo "Usage: $0 -d <directory> [-e <extension>] [-l <limit_in_mb>] [-o <output_file>]" | |
| echo " -d Directory to scan (required)" | |
| echo " -e File extension to filter (optional, e.g., 'png', 'csv'). If omitted, checks all files." | |
| echo " -l Size limit in MB (optional, default: 10)" | |
| echo " -o Output filename (optional, default: large_files_report.txt)" | |
| exit 1 | |
| } | |
| # --- Parse Arguments --- | |
| while getopts ":d:e:l:o:" opt; do | |
| case ${opt} in | |
| d) TARGET_DIR="$OPTARG" ;; | |
| e) FILE_EXT="$OPTARG" ;; | |
| l) SIZE_LIMIT_MB="$OPTARG" ;; | |
| o) OUTPUT_FILE="$OPTARG" ;; | |
| \?) echo "Invalid option: -$OPTARG" >&2; usage ;; | |
| :) echo "Option -$OPTARG requires an argument." >&2; usage ;; | |
| esac | |
| done | |
| # --- Validation --- | |
| if [[ -z "$TARGET_DIR" || ! -d "$TARGET_DIR" ]]; then | |
| echo "Error: Directory '$TARGET_DIR' does not exist or was not provided." >&2 | |
| usage | |
| fi | |
| # Convert MB to Bytes | |
| THRESHOLD=$((SIZE_LIMIT_MB * 1024 * 1024)) | |
| echo "--- Configuration ---" | |
| echo "Target Directory: $TARGET_DIR" | |
| echo "File Filter: ${FILE_EXT:-All files}" | |
| echo "Size Limit: ${SIZE_LIMIT_MB}MB ($THRESHOLD bytes)" | |
| echo "Output File: $OUTPUT_FILE" | |
| echo "---------------------" | |
| # --- Step 1: Find Files --- | |
| echo "Scanning for files..." | |
| # Create a temporary file to store the list of candidate files | |
| # Using mktemp for safety | |
| TEMP_LIST=$(mktemp) | |
| # Build the find command | |
| if [[ -n "$FILE_EXT" ]]; then | |
| # If extension provided, filter by it (case insensitive) | |
| find "$TARGET_DIR" -type f -iname "*.$FILE_EXT" > "$TEMP_LIST" | |
| else | |
| # Otherwise, find all files | |
| find "$TARGET_DIR" -type f > "$TEMP_LIST" | |
| fi | |
| TOTAL_FILES=$(wc -l < "$TEMP_LIST" | tr -d ' ') | |
| if [[ "$TOTAL_FILES" -eq 0 ]]; then | |
| echo "No files found matching the criteria." | |
| rm "$TEMP_LIST" | |
| exit 0 | |
| fi | |
| echo "Found $TOTAL_FILES candidate files. Checking sizes..." | |
| # --- Step 2: Check Sizes with Progress Bar --- | |
| # Initialize output file | |
| > "$OUTPUT_FILE" | |
| CURRENT=0 | |
| START_TIME=$(date +%s) | |
| # Function to draw progress bar | |
| draw_progress_bar() { | |
| local current=$1 | |
| local total=$2 | |
| local width=40 | |
| # Calculate percentage | |
| local percent=$((current * 100 / total)) | |
| # Calculate number of filled bars | |
| local filled=$((percent * width / 100)) | |
| local empty=$((width - filled)) | |
| # Create the bar string | |
| printf -v bar "[%*s%*s]" $filled "" $empty "" | |
| # Fill the empty spaces with # and space (using substitution) | |
| bar=${bar// /#} | |
| bar=${bar//[/[} | |
| bar=${bar//]/]} | |
| # The trick above is complex in pure bash, simpler approach: | |
| # Construct string manually | |
| local str="" | |
| for ((i=0; i<filled; i++)); do str+="#"; done | |
| for ((i=0; i<empty; i++)); do str+="."; done | |
| printf "\rProgress: [%s] %d%% (%d/%d)" "$str" "$percent" "$current" "$total" | |
| } | |
| # Determine stat command syntax once | |
| STAT_CMD="" | |
| if stat --version >/dev/null 2>&1; then | |
| # GNU stat (Linux) | |
| STAT_CMD="stat -c%s" | |
| else | |
| # BSD stat (macOS) | |
| STAT_CMD="stat -f%z" | |
| fi | |
| while IFS= read -r file; do | |
| ((CURRENT++)) | |
| # Update progress bar every 10 files or on the last file to speed up processing | |
| if (( CURRENT % 10 == 0 || CURRENT == TOTAL_FILES )); then | |
| draw_progress_bar "$CURRENT" "$TOTAL_FILES" | |
| fi | |
| # Skip if file was deleted during the process | |
| if [[ ! -f "$file" ]]; then continue; fi | |
| # Get size | |
| size=$($STAT_CMD "$file") | |
| if (( size > THRESHOLD )); then | |
| echo "$file" >> "$OUTPUT_FILE" | |
| fi | |
| done < "$TEMP_LIST" | |
| echo "" # Newline after progress bar | |
| echo "Done! Report saved to: $OUTPUT_FILE" | |
| # Cleanup | |
| rm "$TEMP_LIST" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment