Created
December 21, 2025 04:14
-
-
Save iamajvillalobos/72d14f88ba0d6eb1adbf54ff0122b797 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # summarize-book - Create a 15-minute book summary using Claude | |
| # Usage: summarize-book path/to/book.pdf | |
| set -e | |
| # Colors for output | |
| RED='\033[0;31m' | |
| GREEN='\033[0;32m' | |
| YELLOW='\033[1;33m' | |
| NC='\033[0m' # No Color | |
| # Configuration | |
| SUMMARY_DIR="$HOME/Personal/arq/book-summaries" | |
| SCRIPT_DIR="$HOME/Personal/arq" | |
| TEMP_DIR="" | |
| PAGES_PER_CHUNK=15 | |
| MODEL="haiku" # Options: haiku, sonnet, opus | |
| # Cleanup function | |
| cleanup() { | |
| if [[ -n "$TEMP_DIR" && -d "$TEMP_DIR" ]]; then | |
| echo -e "${YELLOW}Cleaning up temporary files...${NC}" | |
| rm -rf "$TEMP_DIR" | |
| fi | |
| } | |
| # Set trap to cleanup on exit | |
| trap cleanup EXIT | |
| # Check dependencies | |
| check_deps() { | |
| local missing=() | |
| if ! command -v pdftotext &> /dev/null; then | |
| missing+=("pdftotext (install: brew install poppler)") | |
| fi | |
| if ! command -v pdfinfo &> /dev/null; then | |
| missing+=("pdfinfo (install: brew install poppler)") | |
| fi | |
| if ! command -v claude &> /dev/null; then | |
| missing+=("claude (install: npm install -g @anthropic-ai/claude-code)") | |
| fi | |
| if [[ ${#missing[@]} -gt 0 ]]; then | |
| echo -e "${RED}Missing dependencies:${NC}" | |
| for dep in "${missing[@]}"; do | |
| echo " - $dep" | |
| done | |
| exit 1 | |
| fi | |
| } | |
| # Show usage | |
| usage() { | |
| echo "Usage: summarize-book <path-to-pdf> [output-name]" | |
| echo "" | |
| echo "Arguments:" | |
| echo " path-to-pdf Path to the PDF book to summarize" | |
| echo " output-name (Optional) Name for the summary file (without .md)" | |
| echo "" | |
| echo "Example:" | |
| echo " summarize-book ~/Books/clean-code.pdf" | |
| echo " summarize-book ~/Books/clean-code.pdf clean-code-summary" | |
| exit 1 | |
| } | |
| # Extract text from PDF in chunks | |
| chunk_pdf() { | |
| local pdf_path="$1" | |
| local total_pages | |
| total_pages=$(pdfinfo "$pdf_path" 2>/dev/null | grep "Pages:" | awk '{print $2}') | |
| if [[ -z "$total_pages" ]]; then | |
| echo -e "${RED}Error: Could not determine page count${NC}" | |
| exit 1 | |
| fi | |
| echo -e "${GREEN}Processing $total_pages pages...${NC}" | |
| local chunk_num=1 | |
| local start_page=1 | |
| while [[ $start_page -le $total_pages ]]; do | |
| local end_page=$((start_page + PAGES_PER_CHUNK - 1)) | |
| if [[ $end_page -gt $total_pages ]]; then | |
| end_page=$total_pages | |
| fi | |
| local chunk_file="$TEMP_DIR/chunk_$(printf '%02d' $chunk_num).txt" | |
| echo " Extracting pages $start_page-$end_page -> chunk $chunk_num" | |
| # Extract text for this page range | |
| pdftotext -f "$start_page" -l "$end_page" -layout "$pdf_path" "$chunk_file" 2>/dev/null | |
| # Add header to chunk | |
| local header="CHUNK $chunk_num - Pages $start_page to $end_page\n============================================================\n\n" | |
| local content=$(cat "$chunk_file") | |
| echo -e "$header$content" > "$chunk_file" | |
| start_page=$((end_page + 1)) | |
| chunk_num=$((chunk_num + 1)) | |
| done | |
| echo -e "${GREEN}Created $((chunk_num - 1)) chunks${NC}" | |
| } | |
| # Summarize a single chunk | |
| summarize_chunk() { | |
| local chunk_file="$1" | |
| local chunk_num="$2" | |
| local book_name="$3" | |
| local summary_file="$TEMP_DIR/summary_$(printf '%02d' $chunk_num).md" | |
| local prompt="You are summarizing part of a book called '$book_name'. | |
| Read this text and extract the key points, concepts, and actionable insights. | |
| IMPORTANT: Output ONLY bullet points with the key information. No intro, no conclusion, just the essential points. Be concise but capture all important ideas. | |
| $(cat "$chunk_file")" | |
| claude --model "$MODEL" --print "$prompt" > "$summary_file" 2>/dev/null | |
| if [[ $? -eq 0 && -s "$summary_file" ]]; then | |
| echo "$summary_file" | |
| return 0 | |
| else | |
| return 1 | |
| fi | |
| } | |
| # Generate summary using Claude (two-pass approach) | |
| generate_summary() { | |
| local book_name="$1" | |
| local output_file="$2" | |
| echo -e "${GREEN}Pass 1: Summarizing individual chunks...${NC}" | |
| local chunk_files=("$TEMP_DIR"/chunk_*.txt) | |
| local total_chunks=${#chunk_files[@]} | |
| local chunk_num=1 | |
| for chunk_file in "${chunk_files[@]}"; do | |
| echo -e " Processing chunk $chunk_num of $total_chunks..." | |
| if ! summarize_chunk "$chunk_file" "$chunk_num" "$book_name"; then | |
| echo -e "${RED}Failed to summarize chunk $chunk_num${NC}" | |
| return 1 | |
| fi | |
| chunk_num=$((chunk_num + 1)) | |
| done | |
| echo -e "${GREEN}Pass 2: Combining into final summary...${NC}" | |
| echo -e "${YELLOW}(This may take a minute)${NC}" | |
| # Combine all chunk summaries into one file | |
| local combined="$TEMP_DIR/combined_summaries.txt" | |
| cat "$TEMP_DIR"/summary_*.md > "$combined" | |
| # Build the final prompt | |
| local prompt="You are creating a final book summary for '$book_name'. | |
| Below are extracted key points from each section of the book. Synthesize these into a cohesive 15-minute summary. | |
| Focus on: | |
| 1. Core concepts and principles | |
| 2. Key patterns and techniques | |
| 3. Practical takeaways that can be applied immediately | |
| Structure with clear headings and bullet points. Remove redundancy and organize logically. | |
| Keep it under 3000 words. | |
| IMPORTANT: Output ONLY the raw markdown content. No conversational text or commentary. | |
| Start with exactly this header: | |
| # $book_name - 15-Minute Summary | |
| **Read:** $(date '+%B %Y') | |
| --- | |
| Then the organized summary. | |
| Here are the extracted points to synthesize: | |
| $(cat "$combined")" | |
| # Run Claude in print mode | |
| claude --model "$MODEL" --print "$prompt" > "$output_file" | |
| if [[ $? -eq 0 && -s "$output_file" ]]; then | |
| echo -e "${GREEN}Summary saved to: $output_file${NC}" | |
| return 0 | |
| else | |
| echo -e "${RED}Failed to generate summary${NC}" | |
| return 1 | |
| fi | |
| } | |
| # Main | |
| main() { | |
| if [[ $# -lt 1 ]]; then | |
| usage | |
| fi | |
| local pdf_path="$1" | |
| local output_name="${2:-}" | |
| # Validate PDF exists | |
| if [[ ! -f "$pdf_path" ]]; then | |
| echo -e "${RED}Error: File not found: $pdf_path${NC}" | |
| exit 1 | |
| fi | |
| if [[ ! "$pdf_path" =~ \.pdf$ ]]; then | |
| echo -e "${RED}Error: File must be a PDF${NC}" | |
| exit 1 | |
| fi | |
| # Check dependencies | |
| check_deps | |
| # Derive book name from filename if not provided | |
| local book_name | |
| book_name=$(basename "$pdf_path" .pdf | sed 's/[-_]/ /g') | |
| if [[ -z "$output_name" ]]; then | |
| output_name=$(basename "$pdf_path" .pdf | sed 's/ /-/g' | tr '[:upper:]' '[:lower:]') | |
| fi | |
| local output_file="$SUMMARY_DIR/$output_name.md" | |
| # Create temp directory inside project (so Claude has access) | |
| TEMP_DIR="$SCRIPT_DIR/.tmp-chunks-$$" | |
| mkdir -p "$TEMP_DIR" | |
| echo -e "${GREEN}Summarizing: $book_name${NC}" | |
| echo "" | |
| # Create summary directory if needed | |
| mkdir -p "$SUMMARY_DIR" | |
| # Chunk the PDF | |
| chunk_pdf "$pdf_path" | |
| echo "" | |
| # Generate summary | |
| generate_summary "$book_name" "$output_file" | |
| echo "" | |
| echo -e "${GREEN}Done!${NC}" | |
| echo "View with: cat '$output_file'" | |
| } | |
| main "$@" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment