Skip to content

Instantly share code, notes, and snippets.

@chengscott
Created January 30, 2026 15:20
Show Gist options
  • Select an option

  • Save chengscott/aae19d4df17fe139a7a8df3c4259c91a to your computer and use it in GitHub Desktop.

Select an option

Save chengscott/aae19d4df17fe139a7a8df3c4259c91a to your computer and use it in GitHub Desktop.
sudo fail2ban-regex /var/log/nginx/error.log /etc/fail2ban/filter.d/nginx-botsearch.conf --print-all-missed | python3 missed_prefix.py --top 30 --max-depth 4 --min-count 2
#!/usr/bin/env python3
import sys
import re
import argparse
from collections import Counter
# Nginx error log style:
# ... request: "GET /path?x=y HTTP/1.1"
RE_ERR = re.compile(r'request:\s*"([A-Z]+)\s+(\S+)\s+HTTP/[^"]+"')
# Generic access-log style embedded in a line:
# ..."GET /path?x=y HTTP/1.1"...
RE_ACC = re.compile(r'"([A-Z]+)\s+(\S+)\s+HTTP/[^"]+"')
def extract_path(line: str) -> str | None:
m = RE_ERR.search(line) or RE_ACC.search(line)
if not m:
return None
path = m.group(2)
# If something like http://host/path sneaks in, keep only the path
if "://" in path:
try:
path = "/" + path.split("://", 1)[1].split("/", 1)[1]
except Exception:
return None
# Drop query/fragment
path = path.split("?", 1)[0].split("#", 1)[0]
# Normalize
if not path.startswith("/"):
path = "/" + path
return path
def gen_prefixes(path: str, max_depth: int):
# Break into segments and build /a, /a/b, /a/b/c...
segs = [s for s in path.split("/") if s]
for d in range(1, min(max_depth, len(segs)) + 1):
yield "/" + "/".join(segs[:d])
def main():
ap = argparse.ArgumentParser(
description="Summarize most common URL path prefixes from fail2ban-regex --print-all-missed output."
)
ap.add_argument("--top", type=int, default=20, help="How many prefixes to show per depth.")
ap.add_argument("--max-depth", type=int, default=4, help="Prefix depth in path segments.")
ap.add_argument("--min-count", type=int, default=1, help="Only show prefixes with count >= this.")
args = ap.parse_args()
counts_by_depth = [Counter() for _ in range(args.max_depth + 1)]
total_lines = 0
total_paths = 0
for line in sys.stdin:
total_lines += 1
p = extract_path(line)
if not p:
continue
total_paths += 1
for pref in gen_prefixes(p, args.max_depth):
depth = pref.count("/") # "/a" => 1, "/a/b" => 2
counts_by_depth[depth][pref] += 1
print(f"Read {total_lines} lines, extracted {total_paths} request paths.\n")
for depth in range(1, args.max_depth + 1):
items = [(c, pref) for pref, c in counts_by_depth[depth].items() if c >= args.min_count]
items.sort(key=lambda x: (-x[0], x[1]))
if not items:
continue
print(f"=== Top prefixes at depth {depth} ===")
for c, pref in items[: args.top]:
print(f"{c:6d} {pref}")
print()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment