iamajvillalobos · December 21, 2025 04:14
diff --git a/summarize-book.sh b/summarize-book.sh
 #!/bin/bash

 # summarize-book - Create a 15-minute book summary using Claude
 # Usage: summarize-book path/to/book.pdf

 set -e

 # Colors for output
 RED='\033[0;31m'
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 NC='\033[0m' # No Color

 # Configuration
 SUMMARY_DIR="$HOME/Personal/arq/book-summaries"
 SCRIPT_DIR="$HOME/Personal/arq"
 TEMP_DIR=""
 PAGES_PER_CHUNK=15
 MODEL="haiku"  # Options: haiku, sonnet, opus

 # Cleanup function
 cleanup() {
    if [[ -n "$TEMP_DIR" && -d "$TEMP_DIR" ]]; then
        echo -e "${YELLOW}Cleaning up temporary files...${NC}"
        rm -rf "$TEMP_DIR"
    fi
 }

 # Set trap to cleanup on exit
 trap cleanup EXIT

 # Check dependencies
 check_deps() {
    local missing=()

    if ! command -v pdftotext &> /dev/null; then
        missing+=("pdftotext (install: brew install poppler)")
    fi

    if ! command -v pdfinfo &> /dev/null; then
        missing+=("pdfinfo (install: brew install poppler)")
    fi

    if ! command -v claude &> /dev/null; then
        missing+=("claude (install: npm install -g @anthropic-ai/claude-code)")
    fi

    if [[ ${#missing[@]} -gt 0 ]]; then
        echo -e "${RED}Missing dependencies:${NC}"
        for dep in "${missing[@]}"; do
            echo "  - $dep"
        done
        exit 1
    fi
 }

 # Show usage
 usage() {
    echo "Usage: summarize-book <path-to-pdf> [output-name]"
    echo ""
    echo "Arguments:"
    echo "  path-to-pdf   Path to the PDF book to summarize"
    echo "  output-name   (Optional) Name for the summary file (without .md)"
    echo ""
    echo "Example:"
    echo "  summarize-book ~/Books/clean-code.pdf"
    echo "  summarize-book ~/Books/clean-code.pdf clean-code-summary"
    exit 1
 }

 # Extract text from PDF in chunks
 chunk_pdf() {
    local pdf_path="$1"
    local total_pages

    total_pages=$(pdfinfo "$pdf_path" 2>/dev/null | grep "Pages:" | awk '{print $2}')

    if [[ -z "$total_pages" ]]; then
        echo -e "${RED}Error: Could not determine page count${NC}"
        exit 1
    fi

    echo -e "${GREEN}Processing $total_pages pages...${NC}"

    local chunk_num=1
    local start_page=1

    while [[ $start_page -le $total_pages ]]; do
        local end_page=$((start_page + PAGES_PER_CHUNK - 1))
        if [[ $end_page -gt $total_pages ]]; then
            end_page=$total_pages
        fi

        local chunk_file="$TEMP_DIR/chunk_$(printf '%02d' $chunk_num).txt"

        echo "  Extracting pages $start_page-$end_page -> chunk $chunk_num"

        # Extract text for this page range
        pdftotext -f "$start_page" -l "$end_page" -layout "$pdf_path" "$chunk_file" 2>/dev/null

        # Add header to chunk
        local header="CHUNK $chunk_num - Pages $start_page to $end_page\n============================================================\n\n"
        local content=$(cat "$chunk_file")
        echo -e "$header$content" > "$chunk_file"

        start_page=$((end_page + 1))
        chunk_num=$((chunk_num + 1))
    done

    echo -e "${GREEN}Created $((chunk_num - 1)) chunks${NC}"
 }

 # Summarize a single chunk
 summarize_chunk() {
    local chunk_file="$1"
    local chunk_num="$2"
    local book_name="$3"
    local summary_file="$TEMP_DIR/summary_$(printf '%02d' $chunk_num).md"

    local prompt="You are summarizing part of a book called '$book_name'.

 Read this text and extract the key points, concepts, and actionable insights.

 IMPORTANT: Output ONLY bullet points with the key information. No intro, no conclusion, just the essential points. Be concise but capture all important ideas.

 $(cat "$chunk_file")"

    claude --model "$MODEL" --print "$prompt" > "$summary_file" 2>/dev/null

    if [[ $? -eq 0 && -s "$summary_file" ]]; then
        echo "$summary_file"
        return 0
    else
        return 1
    fi
 }

 # Generate summary using Claude (two-pass approach)
 generate_summary() {
    local book_name="$1"
    local output_file="$2"

    echo -e "${GREEN}Pass 1: Summarizing individual chunks...${NC}"

    local chunk_files=("$TEMP_DIR"/chunk_*.txt)
    local total_chunks=${#chunk_files[@]}
    local chunk_num=1

    for chunk_file in "${chunk_files[@]}"; do
        echo -e "  Processing chunk $chunk_num of $total_chunks..."
        if ! summarize_chunk "$chunk_file" "$chunk_num" "$book_name"; then
            echo -e "${RED}Failed to summarize chunk $chunk_num${NC}"
            return 1
        fi
        chunk_num=$((chunk_num + 1))
    done

    echo -e "${GREEN}Pass 2: Combining into final summary...${NC}"
    echo -e "${YELLOW}(This may take a minute)${NC}"

    # Combine all chunk summaries into one file
    local combined="$TEMP_DIR/combined_summaries.txt"
    cat "$TEMP_DIR"/summary_*.md > "$combined"

    # Build the final prompt
    local prompt="You are creating a final book summary for '$book_name'.

 Below are extracted key points from each section of the book. Synthesize these into a cohesive 15-minute summary.

 Focus on:
 1. Core concepts and principles
 2. Key patterns and techniques
 3. Practical takeaways that can be applied immediately

 Structure with clear headings and bullet points. Remove redundancy and organize logically.

 Keep it under 3000 words.

 IMPORTANT: Output ONLY the raw markdown content. No conversational text or commentary.

 Start with exactly this header:

 # $book_name - 15-Minute Summary

 **Read:** $(date '+%B %Y')

 ---

 Then the organized summary.

 Here are the extracted points to synthesize:

 $(cat "$combined")"

    # Run Claude in print mode
    claude --model "$MODEL" --print "$prompt" > "$output_file"

    if [[ $? -eq 0 && -s "$output_file" ]]; then
        echo -e "${GREEN}Summary saved to: $output_file${NC}"
        return 0
    else
        echo -e "${RED}Failed to generate summary${NC}"
        return 1
    fi
 }

 # Main
 main() {
    if [[ $# -lt 1 ]]; then
        usage
    fi

    local pdf_path="$1"
    local output_name="${2:-}"

    # Validate PDF exists
    if [[ ! -f "$pdf_path" ]]; then
        echo -e "${RED}Error: File not found: $pdf_path${NC}"
        exit 1
    fi

    if [[ ! "$pdf_path" =~ \.pdf$ ]]; then
        echo -e "${RED}Error: File must be a PDF${NC}"
        exit 1
    fi

    # Check dependencies
    check_deps

    # Derive book name from filename if not provided
    local book_name
    book_name=$(basename "$pdf_path" .pdf | sed 's/[-_]/ /g')

    if [[ -z "$output_name" ]]; then
        output_name=$(basename "$pdf_path" .pdf | sed 's/ /-/g' | tr '[:upper:]' '[:lower:]')
    fi

    local output_file="$SUMMARY_DIR/$output_name.md"

    # Create temp directory inside project (so Claude has access)
    TEMP_DIR="$SCRIPT_DIR/.tmp-chunks-$$"
    mkdir -p "$TEMP_DIR"
    echo -e "${GREEN}Summarizing: $book_name${NC}"
    echo ""

    # Create summary directory if needed
    mkdir -p "$SUMMARY_DIR"

    # Chunk the PDF
    chunk_pdf "$pdf_path"
    echo ""

    # Generate summary
    generate_summary "$book_name" "$output_file"

    echo ""
    echo -e "${GREEN}Done!${NC}"
    echo "View with: cat '$output_file'"
 }

 main "$@"
	#!/bin/bash

	# summarize-book - Create a 15-minute book summary using Claude
	# Usage: summarize-book path/to/book.pdf

	set -e

	# Colors for output
	RED='\033[0;31m'
	GREEN='\033[0;32m'
	YELLOW='\033[1;33m'
	NC='\033[0m' # No Color

	# Configuration
	SUMMARY_DIR="$HOME/Personal/arq/book-summaries"
	SCRIPT_DIR="$HOME/Personal/arq"
	TEMP_DIR=""
	PAGES_PER_CHUNK=15
	MODEL="haiku" # Options: haiku, sonnet, opus

	# Cleanup function
	cleanup() {
	if [[ -n "$TEMP_DIR" && -d "$TEMP_DIR" ]]; then
	echo -e "${YELLOW}Cleaning up temporary files...${NC}"
	rm -rf "$TEMP_DIR"
	fi
	}

	# Set trap to cleanup on exit
	trap cleanup EXIT

	# Check dependencies
	check_deps() {
	local missing=()

	if ! command -v pdftotext &> /dev/null; then
	missing+=("pdftotext (install: brew install poppler)")
	fi

	if ! command -v pdfinfo &> /dev/null; then
	missing+=("pdfinfo (install: brew install poppler)")
	fi

	if ! command -v claude &> /dev/null; then
	missing+=("claude (install: npm install -g @anthropic-ai/claude-code)")
	fi

	if [[ ${#missing[@]} -gt 0 ]]; then
	echo -e "${RED}Missing dependencies:${NC}"
	for dep in "${missing[@]}"; do
	echo " - $dep"
	done
	exit 1
	fi
	}

	# Show usage
	usage() {
	echo "Usage: summarize-book <path-to-pdf> [output-name]"
	echo ""
	echo "Arguments:"
	echo " path-to-pdf Path to the PDF book to summarize"
	echo " output-name (Optional) Name for the summary file (without .md)"
	echo ""
	echo "Example:"
	echo " summarize-book ~/Books/clean-code.pdf"
	echo " summarize-book ~/Books/clean-code.pdf clean-code-summary"
	exit 1
	}

	# Extract text from PDF in chunks
	chunk_pdf() {
	local pdf_path="$1"
	local total_pages

	total_pages=$(pdfinfo "$pdf_path" 2>/dev/null \| grep "Pages:" \| awk '{print $2}')

	if [[ -z "$total_pages" ]]; then
	echo -e "${RED}Error: Could not determine page count${NC}"
	exit 1
	fi

	echo -e "${GREEN}Processing $total_pages pages...${NC}"

	local chunk_num=1
	local start_page=1

	while [[ $start_page -le $total_pages ]]; do
	local end_page=$((start_page + PAGES_PER_CHUNK - 1))
	if [[ $end_page -gt $total_pages ]]; then
	end_page=$total_pages
	fi

	local chunk_file="$TEMP_DIR/chunk_$(printf '%02d' $chunk_num).txt"

	echo " Extracting pages $start_page-$end_page -> chunk $chunk_num"

	# Extract text for this page range
	pdftotext -f "$start_page" -l "$end_page" -layout "$pdf_path" "$chunk_file" 2>/dev/null

	# Add header to chunk
	local header="CHUNK $chunk_num - Pages $start_page to $end_page\n============================================================\n\n"
	local content=$(cat "$chunk_file")
	echo -e "$header$content" > "$chunk_file"

	start_page=$((end_page + 1))
	chunk_num=$((chunk_num + 1))
	done

	echo -e "${GREEN}Created $((chunk_num - 1)) chunks${NC}"
	}

	# Summarize a single chunk
	summarize_chunk() {
	local chunk_file="$1"
	local chunk_num="$2"
	local book_name="$3"
	local summary_file="$TEMP_DIR/summary_$(printf '%02d' $chunk_num).md"

	local prompt="You are summarizing part of a book called '$book_name'.

	Read this text and extract the key points, concepts, and actionable insights.

	IMPORTANT: Output ONLY bullet points with the key information. No intro, no conclusion, just the essential points. Be concise but capture all important ideas.

	$(cat "$chunk_file")"

	claude --model "$MODEL" --print "$prompt" > "$summary_file" 2>/dev/null

	if [[ $? -eq 0 && -s "$summary_file" ]]; then
	echo "$summary_file"
	return 0
	else
	return 1
	fi
	}

	# Generate summary using Claude (two-pass approach)
	generate_summary() {
	local book_name="$1"
	local output_file="$2"

	echo -e "${GREEN}Pass 1: Summarizing individual chunks...${NC}"

	local chunk_files=("$TEMP_DIR"/chunk_*.txt)
	local total_chunks=${#chunk_files[@]}
	local chunk_num=1

	for chunk_file in "${chunk_files[@]}"; do
	echo -e " Processing chunk $chunk_num of $total_chunks..."
	if ! summarize_chunk "$chunk_file" "$chunk_num" "$book_name"; then
	echo -e "${RED}Failed to summarize chunk $chunk_num${NC}"
	return 1
	fi
	chunk_num=$((chunk_num + 1))
	done

	echo -e "${GREEN}Pass 2: Combining into final summary...${NC}"
	echo -e "${YELLOW}(This may take a minute)${NC}"

	# Combine all chunk summaries into one file
	local combined="$TEMP_DIR/combined_summaries.txt"
	cat "$TEMP_DIR"/summary_*.md > "$combined"

	# Build the final prompt
	local prompt="You are creating a final book summary for '$book_name'.

	Below are extracted key points from each section of the book. Synthesize these into a cohesive 15-minute summary.

	Focus on:
	1. Core concepts and principles
	2. Key patterns and techniques
	3. Practical takeaways that can be applied immediately

	Structure with clear headings and bullet points. Remove redundancy and organize logically.

	Keep it under 3000 words.

	IMPORTANT: Output ONLY the raw markdown content. No conversational text or commentary.

	Start with exactly this header:

	# $book_name - 15-Minute Summary

	Read: $(date '+%B %Y')

	---

	Then the organized summary.

	Here are the extracted points to synthesize:

	$(cat "$combined")"

	# Run Claude in print mode
	claude --model "$MODEL" --print "$prompt" > "$output_file"

	if [[ $? -eq 0 && -s "$output_file" ]]; then
	echo -e "${GREEN}Summary saved to: $output_file${NC}"
	return 0
	else
	echo -e "${RED}Failed to generate summary${NC}"
	return 1
	fi
	}

	# Main
	main() {
	if [[ $# -lt 1 ]]; then
	usage
	fi

	local pdf_path="$1"
	local output_name="${2:-}"

	# Validate PDF exists
	if [[ ! -f "$pdf_path" ]]; then
	echo -e "${RED}Error: File not found: $pdf_path${NC}"
	exit 1
	fi

	if [[ ! "$pdf_path" =~ \.pdf$ ]]; then
	echo -e "${RED}Error: File must be a PDF${NC}"
	exit 1
	fi

	# Check dependencies
	check_deps

	# Derive book name from filename if not provided
	local book_name
	book_name=$(basename "$pdf_path" .pdf \| sed 's/[-_]/ /g')

	if [[ -z "$output_name" ]]; then
	output_name=$(basename "$pdf_path" .pdf \| sed 's/ /-/g' \| tr '[:upper:]' '[:lower:]')
	fi

	local output_file="$SUMMARY_DIR/$output_name.md"

	# Create temp directory inside project (so Claude has access)
	TEMP_DIR="$SCRIPT_DIR/.tmp-chunks-$$"
	mkdir -p "$TEMP_DIR"
	echo -e "${GREEN}Summarizing: $book_name${NC}"
	echo ""

	# Create summary directory if needed
	mkdir -p "$SUMMARY_DIR"

	# Chunk the PDF
	chunk_pdf "$pdf_path"
	echo ""

	# Generate summary
	generate_summary "$book_name" "$output_file"

	echo ""
	echo -e "${GREEN}Done!${NC}"
	echo "View with: cat '$output_file'"
	}

	main "$@"
No results found