Created
February 15, 2026 12:02
-
-
Save graysky2/8035291d1bf87b8fe3693668965337e1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| AdGuard Home Blocklist Usage Analyzer | |
| Analyzes query logs to determine which blocklists are actually being used. | |
| """ | |
| import json | |
| import os | |
| from pathlib import Path | |
| from collections import defaultdict | |
| def load_blocklists(filters_dir): | |
| """Get all blocklist filenames and extract their FilterListIDs.""" | |
| blocklists = {} | |
| filters_path = Path(filters_dir) | |
| if not filters_path.exists(): | |
| print(f"Error: Filters directory '{filters_dir}' not found") | |
| return blocklists | |
| filter_files = list(filters_path.glob("*.txt")) | |
| if not filter_files: | |
| print(f"Warning: No .txt files found in '{filters_dir}'") | |
| return blocklists | |
| print(f"Loading {len(filter_files)} blocklists...") | |
| for filter_file in filter_files: | |
| # Extract FilterListID from filename (e.g., "1769458712.txt" -> 1769458712) | |
| try: | |
| filter_id = int(filter_file.stem) | |
| blocklists[filter_id] = filter_file.name | |
| print(f" {filter_file.name} (ID: {filter_id})") | |
| except ValueError: | |
| # Filename doesn't have a numeric ID, use filename as key | |
| blocklists[filter_file.name] = filter_file.name | |
| print(f" {filter_file.name} (no numeric ID)") | |
| return blocklists | |
| def analyze_querylog(log_file, blocklists): | |
| """Analyze query log and track which FilterListIDs are used.""" | |
| if not os.path.exists(log_file): | |
| print(f"Error: Query log '{log_file}' not found") | |
| return None | |
| blocklist_hits = defaultdict(int) | |
| total_blocked = 0 | |
| print(f"\nAnalyzing query log: {log_file}") | |
| # Initialize all blocklists with 0 hits | |
| for filter_id in blocklists.keys(): | |
| blocklist_hits[filter_id] = 0 | |
| try: | |
| with open(log_file, 'r', encoding='utf-8') as f: | |
| for line_num, line in enumerate(f, 1): | |
| try: | |
| entry = json.loads(line.strip()) | |
| # Check if this query was blocked | |
| result = entry.get('Result', {}) | |
| if result and 'Rules' in result: | |
| total_blocked += 1 | |
| # Extract FilterListIDs from all rules | |
| for rule in result['Rules']: | |
| filter_id = rule.get('FilterListID') | |
| if filter_id is not None: | |
| blocklist_hits[filter_id] += 1 | |
| except json.JSONDecodeError: | |
| pass # Skip malformed lines | |
| except Exception as e: | |
| pass # Skip problematic entries | |
| if line_num % 100000 == 0: | |
| print(f" Processed {line_num} entries...") | |
| print(f" Total entries processed: {line_num:,}") | |
| print(f" Total blocked queries: {total_blocked:,}") | |
| except Exception as e: | |
| print(f"Error reading query log: {e}") | |
| return None | |
| return blocklist_hits | |
| def print_report(blocklist_hits, blocklists): | |
| """Print usage report.""" | |
| print("\n" + "="*60) | |
| print("BLOCKLIST USAGE REPORT") | |
| print("="*60) | |
| # Sort by hit count (descending) | |
| sorted_lists = sorted(blocklist_hits.items(), key=lambda x: x[1], reverse=True) | |
| used_lists = [(filter_id, hits) for filter_id, hits in sorted_lists if hits > 0] | |
| unused_lists = [(filter_id, hits) for filter_id, hits in sorted_lists if hits == 0] | |
| if used_lists: | |
| print(f"\nUSED BLOCKLISTS ({len(used_lists)}):") | |
| print("-" * 60) | |
| for filter_id, hits in used_lists: | |
| filename = blocklists.get(filter_id, str(filter_id)) | |
| print(f" {filename}: {hits:,} blocks") | |
| if unused_lists: | |
| print(f"\nUNUSED BLOCKLISTS ({len(unused_lists)}):") | |
| print("-" * 60) | |
| for filter_id, hits in unused_lists: | |
| filename = blocklists.get(filter_id, str(filter_id)) | |
| print(f" {filename}") | |
| else: | |
| print("\nAll blocklists are being used!") | |
| print("\n" + "="*60) | |
| def main(): | |
| # Configuration | |
| data_dir = "./data" | |
| filters_dir = os.path.join(data_dir, "filters") | |
| log_file = os.path.join(data_dir, "querylog.json") | |
| print("AdGuard Home Blocklist Usage Analyzer") | |
| print("="*60) | |
| print(f"Data directory: {data_dir}") | |
| print(f"Filters directory: {filters_dir}") | |
| print(f"Query log: {log_file}") | |
| print() | |
| # Load blocklists | |
| blocklists = load_blocklists(filters_dir) | |
| if not blocklists: | |
| print("No blocklists loaded. Exiting.") | |
| return | |
| # Analyze query log | |
| blocklist_hits = analyze_querylog(log_file, blocklists) | |
| if blocklist_hits is None: | |
| print("Failed to analyze query log. Exiting.") | |
| return | |
| # Print report | |
| print_report(blocklist_hits, blocklists) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment