Skip to content

Instantly share code, notes, and snippets.

@iamajvillalobos
Created December 21, 2025 04:14
Show Gist options
  • Select an option

  • Save iamajvillalobos/72d14f88ba0d6eb1adbf54ff0122b797 to your computer and use it in GitHub Desktop.

Select an option

Save iamajvillalobos/72d14f88ba0d6eb1adbf54ff0122b797 to your computer and use it in GitHub Desktop.
#!/bin/bash
# summarize-book - Create a 15-minute book summary using Claude
# Usage: summarize-book path/to/book.pdf
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Configuration
SUMMARY_DIR="$HOME/Personal/arq/book-summaries"
SCRIPT_DIR="$HOME/Personal/arq"
TEMP_DIR=""
PAGES_PER_CHUNK=15
MODEL="haiku" # Options: haiku, sonnet, opus
# Cleanup function
cleanup() {
if [[ -n "$TEMP_DIR" && -d "$TEMP_DIR" ]]; then
echo -e "${YELLOW}Cleaning up temporary files...${NC}"
rm -rf "$TEMP_DIR"
fi
}
# Set trap to cleanup on exit
trap cleanup EXIT
# Check dependencies
check_deps() {
local missing=()
if ! command -v pdftotext &> /dev/null; then
missing+=("pdftotext (install: brew install poppler)")
fi
if ! command -v pdfinfo &> /dev/null; then
missing+=("pdfinfo (install: brew install poppler)")
fi
if ! command -v claude &> /dev/null; then
missing+=("claude (install: npm install -g @anthropic-ai/claude-code)")
fi
if [[ ${#missing[@]} -gt 0 ]]; then
echo -e "${RED}Missing dependencies:${NC}"
for dep in "${missing[@]}"; do
echo " - $dep"
done
exit 1
fi
}
# Show usage
usage() {
echo "Usage: summarize-book <path-to-pdf> [output-name]"
echo ""
echo "Arguments:"
echo " path-to-pdf Path to the PDF book to summarize"
echo " output-name (Optional) Name for the summary file (without .md)"
echo ""
echo "Example:"
echo " summarize-book ~/Books/clean-code.pdf"
echo " summarize-book ~/Books/clean-code.pdf clean-code-summary"
exit 1
}
# Extract text from PDF in chunks
chunk_pdf() {
local pdf_path="$1"
local total_pages
total_pages=$(pdfinfo "$pdf_path" 2>/dev/null | grep "Pages:" | awk '{print $2}')
if [[ -z "$total_pages" ]]; then
echo -e "${RED}Error: Could not determine page count${NC}"
exit 1
fi
echo -e "${GREEN}Processing $total_pages pages...${NC}"
local chunk_num=1
local start_page=1
while [[ $start_page -le $total_pages ]]; do
local end_page=$((start_page + PAGES_PER_CHUNK - 1))
if [[ $end_page -gt $total_pages ]]; then
end_page=$total_pages
fi
local chunk_file="$TEMP_DIR/chunk_$(printf '%02d' $chunk_num).txt"
echo " Extracting pages $start_page-$end_page -> chunk $chunk_num"
# Extract text for this page range
pdftotext -f "$start_page" -l "$end_page" -layout "$pdf_path" "$chunk_file" 2>/dev/null
# Add header to chunk
local header="CHUNK $chunk_num - Pages $start_page to $end_page\n============================================================\n\n"
local content=$(cat "$chunk_file")
echo -e "$header$content" > "$chunk_file"
start_page=$((end_page + 1))
chunk_num=$((chunk_num + 1))
done
echo -e "${GREEN}Created $((chunk_num - 1)) chunks${NC}"
}
# Summarize a single chunk
summarize_chunk() {
local chunk_file="$1"
local chunk_num="$2"
local book_name="$3"
local summary_file="$TEMP_DIR/summary_$(printf '%02d' $chunk_num).md"
local prompt="You are summarizing part of a book called '$book_name'.
Read this text and extract the key points, concepts, and actionable insights.
IMPORTANT: Output ONLY bullet points with the key information. No intro, no conclusion, just the essential points. Be concise but capture all important ideas.
$(cat "$chunk_file")"
claude --model "$MODEL" --print "$prompt" > "$summary_file" 2>/dev/null
if [[ $? -eq 0 && -s "$summary_file" ]]; then
echo "$summary_file"
return 0
else
return 1
fi
}
# Generate summary using Claude (two-pass approach)
generate_summary() {
local book_name="$1"
local output_file="$2"
echo -e "${GREEN}Pass 1: Summarizing individual chunks...${NC}"
local chunk_files=("$TEMP_DIR"/chunk_*.txt)
local total_chunks=${#chunk_files[@]}
local chunk_num=1
for chunk_file in "${chunk_files[@]}"; do
echo -e " Processing chunk $chunk_num of $total_chunks..."
if ! summarize_chunk "$chunk_file" "$chunk_num" "$book_name"; then
echo -e "${RED}Failed to summarize chunk $chunk_num${NC}"
return 1
fi
chunk_num=$((chunk_num + 1))
done
echo -e "${GREEN}Pass 2: Combining into final summary...${NC}"
echo -e "${YELLOW}(This may take a minute)${NC}"
# Combine all chunk summaries into one file
local combined="$TEMP_DIR/combined_summaries.txt"
cat "$TEMP_DIR"/summary_*.md > "$combined"
# Build the final prompt
local prompt="You are creating a final book summary for '$book_name'.
Below are extracted key points from each section of the book. Synthesize these into a cohesive 15-minute summary.
Focus on:
1. Core concepts and principles
2. Key patterns and techniques
3. Practical takeaways that can be applied immediately
Structure with clear headings and bullet points. Remove redundancy and organize logically.
Keep it under 3000 words.
IMPORTANT: Output ONLY the raw markdown content. No conversational text or commentary.
Start with exactly this header:
# $book_name - 15-Minute Summary
**Read:** $(date '+%B %Y')
---
Then the organized summary.
Here are the extracted points to synthesize:
$(cat "$combined")"
# Run Claude in print mode
claude --model "$MODEL" --print "$prompt" > "$output_file"
if [[ $? -eq 0 && -s "$output_file" ]]; then
echo -e "${GREEN}Summary saved to: $output_file${NC}"
return 0
else
echo -e "${RED}Failed to generate summary${NC}"
return 1
fi
}
# Main
main() {
if [[ $# -lt 1 ]]; then
usage
fi
local pdf_path="$1"
local output_name="${2:-}"
# Validate PDF exists
if [[ ! -f "$pdf_path" ]]; then
echo -e "${RED}Error: File not found: $pdf_path${NC}"
exit 1
fi
if [[ ! "$pdf_path" =~ \.pdf$ ]]; then
echo -e "${RED}Error: File must be a PDF${NC}"
exit 1
fi
# Check dependencies
check_deps
# Derive book name from filename if not provided
local book_name
book_name=$(basename "$pdf_path" .pdf | sed 's/[-_]/ /g')
if [[ -z "$output_name" ]]; then
output_name=$(basename "$pdf_path" .pdf | sed 's/ /-/g' | tr '[:upper:]' '[:lower:]')
fi
local output_file="$SUMMARY_DIR/$output_name.md"
# Create temp directory inside project (so Claude has access)
TEMP_DIR="$SCRIPT_DIR/.tmp-chunks-$$"
mkdir -p "$TEMP_DIR"
echo -e "${GREEN}Summarizing: $book_name${NC}"
echo ""
# Create summary directory if needed
mkdir -p "$SUMMARY_DIR"
# Chunk the PDF
chunk_pdf "$pdf_path"
echo ""
# Generate summary
generate_summary "$book_name" "$output_file"
echo ""
echo -e "${GREEN}Done!${NC}"
echo "View with: cat '$output_file'"
}
main "$@"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment