pradyunsg · December 19, 2025 15:07
diff --git a/lookup.sh b/lookup.sh
 #!/usr/bin/env bash

 set -euo pipefail

 # Print usage and exit
 usage() {
    cat >&2 <<'USAGE'
 Usage: lookup.sh <search-phrase> [PEP ...]

 Search for sentence-level matches of <search-phrase> in PEP files.
 Optional PEP numbers are decimal numbers (e.g., 8, 123) without leading zeros.
 Examples:
  lookup.sh "async" 8 492
  lookup.sh "type hint"
 USAGE
    exit 2
 }

 # Require at least one argument (search phrase)
 if [ "$#" -lt 1 ]; then
    echo "Error: missing search phrase." >&2
    usage
 fi

 # First argument: search phrase
 phrase="$1"
 shift

 # Remaining arguments: optional PEP numbers (without leading zeros)
 if [ "$#" -gt 0 ]; then
    # Build file list from provided PEP numbers, adding leading zeros
    files=()
    for pep in "$@"; do
        if ! [[ "$pep" =~ ^[0-9]+$ ]]; then
            echo "Error: invalid PEP number: $pep" >&2
            usage
        fi
        files+=( "peps/pep-$(printf '%04d' "$pep").rst" )
    done
 else
    # Default: all PEP files under peps/
    files=(peps/pep-*.rst)
 fi

 # Run ripgrep with JSON output for sentence-level matches
 rg --pcre2 \
   --multiline \
   --with-filename \
   --only-matching \
   --ignore-case \
   --json \
   "(?xs)
      (?:^|(?<=[.!?]))   # sentence start
      \s*                # optional whitespace
      (?:(?!\n\s*\n).)*? # leading sentence text (don't cross empty lines)
      ${phrase}          # target word
      [^.!?]*            # trailing sentence text
      (?:[.!?]|$)        # sentence end
   " \
   "${files[@]}" \
 | jq -r '
    # Only consider ripgrep "match" events
    select(.type == "match")
    | .data as $d

    # Extract the PEP number from the filename (e.g., "pep-0639.rst" -> "0639")
    | ($d.path.text | capture("pep-(?<n>\\d+)\\.rst").n) as $pep

    # Normalize PEP display (strip leading zeros)
    | ($pep | tonumber | tostring) as $pep_disp

    # Save commonly used fields
    | $d.path.text as $file
    | $d.line_number as $block_line

    # For each submatch...
    | .data.submatches[] as $sm

    # Clean up the matched sentence text
    | ($sm.match.text
        | gsub("^\\s+|\\s+$"; "")    # strip leading/trailing whitespace
        | gsub("\\s+"; " ")          # collapse internal whitespace
      ) as $s

    # Skip over newlines to find the real start-of-sentence line number
    | ($d.lines.text[0:$sm.start] | gsub("[^\\n]"; "") | length) as $before
    | ($sm.match.text | capture("(?<nl>^\\n*)").nl | gsub("[^\\n]"; "") | length) as $lead
    | ($block_line + $before + $lead) as $line

    # Emit a Markdown link to the exact file + line on GitHub
    | "- [ ] [PEP \($pep_disp), ~line \($line)](https://github.com/python/peps/blob/main/\($file)?plain=1#L\($line)): [\($s)]"
 '
	#!/usr/bin/env bash

	set -euo pipefail

	# Print usage and exit
	usage() {
	cat >&2 <<'USAGE'
	Usage: lookup.sh <search-phrase> [PEP ...]

	Search for sentence-level matches of <search-phrase> in PEP files.
	Optional PEP numbers are decimal numbers (e.g., 8, 123) without leading zeros.
	Examples:
	lookup.sh "async" 8 492
	lookup.sh "type hint"
	USAGE
	exit 2
	}

	# Require at least one argument (search phrase)
	if [ "$#" -lt 1 ]; then
	echo "Error: missing search phrase." >&2
	usage
	fi

	# First argument: search phrase
	phrase="$1"
	shift

	# Remaining arguments: optional PEP numbers (without leading zeros)
	if [ "$#" -gt 0 ]; then
	# Build file list from provided PEP numbers, adding leading zeros
	files=()
	for pep in "$@"; do
	if ! [[ "$pep" =~ ^[0-9]+$ ]]; then
	echo "Error: invalid PEP number: $pep" >&2
	usage
	fi
	files+=( "peps/pep-$(printf '%04d' "$pep").rst" )
	done
	else
	# Default: all PEP files under peps/
	files=(peps/pep-*.rst)
	fi

	# Run ripgrep with JSON output for sentence-level matches
	rg --pcre2 \
	--multiline \
	--with-filename \
	--only-matching \
	--ignore-case \
	--json \
	"(?xs)
	(?:^\|(?<=[.!?])) # sentence start
	\s* # optional whitespace
	(?:(?!\n\s\n).)? # leading sentence text (don't cross empty lines)
	${phrase} # target word
	[^.!?]* # trailing sentence text
	(?:[.!?]\|$) # sentence end
	" \
	"${files[@]}" \
	\| jq -r '
	# Only consider ripgrep "match" events
	select(.type == "match")
	\| .data as $d

	# Extract the PEP number from the filename (e.g., "pep-0639.rst" -> "0639")
	\| ($d.path.text \| capture("pep-(?<n>\\d+)\\.rst").n) as $pep

	# Normalize PEP display (strip leading zeros)
	\| ($pep \| tonumber \| tostring) as $pep_disp

	# Save commonly used fields
	\| $d.path.text as $file
	\| $d.line_number as $block_line

	# For each submatch...
	\| .data.submatches[] as $sm

	# Clean up the matched sentence text
	\| ($sm.match.text
	\| gsub("^\\s+\|\\s+$"; "") # strip leading/trailing whitespace
	\| gsub("\\s+"; " ") # collapse internal whitespace
	) as $s

	# Skip over newlines to find the real start-of-sentence line number
	\| ($d.lines.text[0:$sm.start] \| gsub("[^\\n]"; "") \| length) as $before
	\| ($sm.match.text \| capture("(?<nl>^\\n*)").nl \| gsub("[^\\n]"; "") \| length) as $lead
	\| ($block_line + $before + $lead) as $line

	# Emit a Markdown link to the exact file + line on GitHub
	\| "- [ ] [PEP \($pep_disp), ~line \($line)](https://github.com/python/peps/blob/main/\($file)?plain=1#L\($line)): [\($s)]"
	'
No results found