Created
January 30, 2026 15:20
-
-
Save chengscott/aae19d4df17fe139a7a8df3c4259c91a to your computer and use it in GitHub Desktop.
sudo fail2ban-regex /var/log/nginx/error.log /etc/fail2ban/filter.d/nginx-botsearch.conf --print-all-missed | python3 missed_prefix.py --top 30 --max-depth 4 --min-count 2
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import sys | |
| import re | |
| import argparse | |
| from collections import Counter | |
| # Nginx error log style: | |
| # ... request: "GET /path?x=y HTTP/1.1" | |
| RE_ERR = re.compile(r'request:\s*"([A-Z]+)\s+(\S+)\s+HTTP/[^"]+"') | |
| # Generic access-log style embedded in a line: | |
| # ..."GET /path?x=y HTTP/1.1"... | |
| RE_ACC = re.compile(r'"([A-Z]+)\s+(\S+)\s+HTTP/[^"]+"') | |
| def extract_path(line: str) -> str | None: | |
| m = RE_ERR.search(line) or RE_ACC.search(line) | |
| if not m: | |
| return None | |
| path = m.group(2) | |
| # If something like http://host/path sneaks in, keep only the path | |
| if "://" in path: | |
| try: | |
| path = "/" + path.split("://", 1)[1].split("/", 1)[1] | |
| except Exception: | |
| return None | |
| # Drop query/fragment | |
| path = path.split("?", 1)[0].split("#", 1)[0] | |
| # Normalize | |
| if not path.startswith("/"): | |
| path = "/" + path | |
| return path | |
| def gen_prefixes(path: str, max_depth: int): | |
| # Break into segments and build /a, /a/b, /a/b/c... | |
| segs = [s for s in path.split("/") if s] | |
| for d in range(1, min(max_depth, len(segs)) + 1): | |
| yield "/" + "/".join(segs[:d]) | |
| def main(): | |
| ap = argparse.ArgumentParser( | |
| description="Summarize most common URL path prefixes from fail2ban-regex --print-all-missed output." | |
| ) | |
| ap.add_argument("--top", type=int, default=20, help="How many prefixes to show per depth.") | |
| ap.add_argument("--max-depth", type=int, default=4, help="Prefix depth in path segments.") | |
| ap.add_argument("--min-count", type=int, default=1, help="Only show prefixes with count >= this.") | |
| args = ap.parse_args() | |
| counts_by_depth = [Counter() for _ in range(args.max_depth + 1)] | |
| total_lines = 0 | |
| total_paths = 0 | |
| for line in sys.stdin: | |
| total_lines += 1 | |
| p = extract_path(line) | |
| if not p: | |
| continue | |
| total_paths += 1 | |
| for pref in gen_prefixes(p, args.max_depth): | |
| depth = pref.count("/") # "/a" => 1, "/a/b" => 2 | |
| counts_by_depth[depth][pref] += 1 | |
| print(f"Read {total_lines} lines, extracted {total_paths} request paths.\n") | |
| for depth in range(1, args.max_depth + 1): | |
| items = [(c, pref) for pref, c in counts_by_depth[depth].items() if c >= args.min_count] | |
| items.sort(key=lambda x: (-x[0], x[1])) | |
| if not items: | |
| continue | |
| print(f"=== Top prefixes at depth {depth} ===") | |
| for c, pref in items[: args.top]: | |
| print(f"{c:6d} {pref}") | |
| print() | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment