Created
February 5, 2026 11:14
-
-
Save monperrus/bec6b9e3e7140c8ee978f7b5d6e72918 to your computer and use it in GitHub Desktop.
Script to identify all <repository> and <pluginRepository> in a Maven dependency tree.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Script to identify all <repository> and <pluginRepository> in a Maven dependency tree. | |
| Usage: | |
| python maven_repo_finder.py <pom.xml or directory> | |
| Author: Martin Monperrus | |
| """ | |
| import xml.etree.ElementTree as ET | |
| import sys | |
| from pathlib import Path | |
| import tempfile | |
| import os | |
| import tempfile | |
| import subprocess | |
| import json | |
| def find_repositories_and_dependencies(xml_file, visited=None): | |
| """ | |
| Parse a Maven pom.xml file, extract repositories, and recursively fetch dependencies. | |
| Args: | |
| xml_file: Path to the pom.xml file | |
| visited: Set of already visited artifacts to avoid cycles | |
| Returns: | |
| Dictionary with repositories and dependency information | |
| """ | |
| import urllib.request | |
| if visited is None: | |
| visited = set() | |
| try: | |
| tree = ET.parse(xml_file) | |
| root = tree.getroot() | |
| # Maven namespace | |
| ns = {'mvn': 'http://maven.apache.org/POM/4.0.0'} | |
| # Get artifact coordinates to track visited | |
| group_id = root.findtext('groupId') or root.findtext('mvn:groupId', namespaces=ns) | |
| artifact_id = root.findtext('artifactId') or root.findtext('mvn:artifactId', namespaces=ns) | |
| version = root.findtext('version') or root.findtext('mvn:version', namespaces=ns) | |
| artifact_key = f"{group_id}:{artifact_id}:{version}" | |
| if artifact_key in visited: | |
| return None | |
| visited.add(artifact_key) | |
| # Find dependencies | |
| dependencies = root.findall('.//dependency') or root.findall('.//mvn:dependency', ns) | |
| dep_results = [] | |
| for dep in dependencies: | |
| dep_group = dep.findtext('groupId') or dep.findtext('mvn:groupId', namespaces=ns) | |
| dep_artifact = dep.findtext('artifactId') or dep.findtext('mvn:artifactId', namespaces=ns) | |
| dep_version = dep.findtext('version') or dep.findtext('mvn:version', namespaces=ns) | |
| # Handle version ranges - extract concrete bound | |
| if dep_version and (dep_version.startswith('[') or dep_version.startswith('(')): | |
| # Extract version from range like [1.0,2.0) or [1.0] | |
| dep_version = dep_version.strip('[]()').split(',')[0] | |
| # Resolve ${project.version} placeholder | |
| if dep_version and '${project.version}' in dep_version: | |
| project_version = root.findtext('version') or root.findtext('mvn:version', namespaces=ns) | |
| # If not found at root, check under parent | |
| if not project_version: | |
| parent = root.find('parent') or root.find('mvn:parent', namespaces=ns) | |
| if parent is not None: | |
| project_version = parent.findtext('version') or parent.findtext('mvn:version', namespaces=ns) | |
| if project_version: | |
| dep_version = dep_version.replace('${project.version}', project_version) | |
| # print(f"Found dependency: {dep_group}:{dep_artifact}:{dep_version}") | |
| if dep_group and dep_artifact and dep_version: | |
| dep_info = { | |
| 'groupId': dep_group, | |
| 'artifactId': dep_artifact, | |
| 'version': dep_version | |
| } | |
| dep_results.append(dep_info) | |
| # Fetch remote pom.xml | |
| # Try to find the pom.xml in local .m2 repository | |
| m2_repo = Path.home() / '.m2' / 'repository' | |
| group_path = dep_group.replace('.', '/') | |
| local_pom = m2_repo / group_path / dep_artifact / dep_version / f"{dep_artifact}-{dep_version}.pom" | |
| if local_pom.exists(): | |
| dep_info['local_repositories'] = find_repositories_and_dependencies( | |
| str(local_pom), visited | |
| ) | |
| data = find_repositories(xml_file) | |
| data['dependencies'] = dep_results | |
| # print(data) | |
| # Merge repositories from dependencies | |
| all_repos = data["repositories"] | |
| all_plugin_repos = data['pluginRepositories'] | |
| for dep_info in dep_results: | |
| if 'local_repositories' in dep_info and dep_info['local_repositories']: | |
| dep_repos = dep_info['local_repositories'].get('repositories', {}) | |
| for repo in dep_repos: | |
| repo['source_file'] = str(local_pom) | |
| if dep_repos: | |
| all_repos.extend(dep_repos) | |
| all_plugin_repos.extend(dep_repos) | |
| data["repositories"] = all_repos | |
| data['pluginRepositories'] = all_plugin_repos | |
| # print(data) | |
| return data | |
| except Exception as e: | |
| print(f"Error processing dependencies in {xml_file}: {e}", file=sys.stderr) | |
| raise e | |
| return None | |
| def find_repositories(xml_file): | |
| """ | |
| Parse a Maven pom.xml file and extract all repository and pluginRepository entries. | |
| Args: | |
| xml_file: Path to the pom.xml file | |
| Returns: | |
| Dictionary with 'repositories' and 'pluginRepositories' lists | |
| """ | |
| results = { | |
| 'repositories': [], | |
| 'pluginRepositories': [] | |
| } | |
| try: | |
| tree = ET.parse(xml_file) | |
| root = tree.getroot() | |
| # Maven namespace | |
| ns = {'mvn': 'http://maven.apache.org/POM/4.0.0'} | |
| # Try without namespace first (some pom files don't use it) | |
| repositories = root.findall('.//repository') | |
| plugin_repositories = root.findall('.//pluginRepository') | |
| # If nothing found, try with namespace | |
| if not repositories and not plugin_repositories: | |
| repositories = root.findall('.//mvn:repository', ns) | |
| plugin_repositories = root.findall('.//mvn:pluginRepository', ns) | |
| # Extract repository information | |
| for repo in repositories: | |
| repo_info = {} | |
| for child in repo: | |
| tag = child.tag.split('}')[-1] # Remove namespace if present | |
| repo_info[tag] = child.text | |
| results['repositories'].append(repo_info) | |
| # Extract plugin repository information | |
| for repo in plugin_repositories: | |
| repo_info = {} | |
| for child in repo: | |
| tag = child.tag.split('}')[-1] # Remove namespace if present | |
| repo_info[tag] = child.text | |
| results['pluginRepositories'].append(repo_info) | |
| return results | |
| except ET.ParseError as e: | |
| print(f"Error parsing {xml_file}: {e}", file=sys.stderr) | |
| return results | |
| except Exception as e: | |
| print(f"Error processing {xml_file}: {e}", file=sys.stderr) | |
| return results | |
| def scan_directory(directory): | |
| """ | |
| Recursively scan directory for pom.xml files and extract repositories. | |
| Args: | |
| directory: Root directory to scan | |
| """ | |
| path = Path(directory) | |
| all_results = {} | |
| for pom_file in path.rglob('pom.xml'): | |
| # Skip pom.xml files in resources folders | |
| if 'resources' in pom_file.parts: | |
| continue | |
| # Run mvn dependency:go-offline to fetch dependencies locally | |
| try: | |
| pom_dir = pom_file.parent | |
| print(f" Fetching dependencies for {pom_file}...") | |
| subprocess.run( | |
| ['mvn', 'dependency:go-offline', '-f', str(pom_file)], | |
| cwd=pom_dir, | |
| capture_output=True, | |
| timeout=300 | |
| ) | |
| except subprocess.TimeoutExpired: | |
| print(f" Warning: Timeout fetching dependencies for {pom_file}", file=sys.stderr) | |
| except Exception as e: | |
| print(f" Warning: Could not fetch dependencies for {pom_file}: {e}", file=sys.stderr) | |
| print(f"\nProcessing: {pom_file}") | |
| results = find_repositories_and_dependencies(pom_file) | |
| # print(f" Results: {results}" ) | |
| if results: | |
| all_results[str(pom_file)] = results | |
| if results['repositories']: | |
| print(f" Found {len(results['repositories'])} repositories:") | |
| for repo in results['repositories']: | |
| print(json.dumps(repo)) | |
| if 'pluginRepositories' in results and results['pluginRepositories']: | |
| print(f" Found {len(results['pluginRepositories'])} plugin repositories:") | |
| for repo in results['pluginRepositories']: | |
| print(json.dumps(repo)) | |
| return all_results | |
| def main(): | |
| if len(sys.argv) < 2: | |
| print("Usage: python maven_repo_finder.py <pom.xml or directory>") | |
| print("\nExamples:") | |
| print(" python maven_repo_finder.py pom.xml") | |
| print(" python maven_repo_finder.py /path/to/maven/project") | |
| sys.exit(1) | |
| target = sys.argv[1] | |
| path = Path(target) | |
| if path.is_file(): | |
| # Single file | |
| results = find_repositories_and_dependencies(path) | |
| if results: | |
| print(f"\n=== Results for {path} ===") | |
| print(f"\nRepositories ({len(results['repositories'])}):") | |
| for repo in results['repositories']: | |
| print(f" ID: {repo.get('id', 'N/A')}") | |
| print(f" URL: {repo.get('url', 'N/A')}") | |
| print(f" Name: {repo.get('name', 'N/A')}") | |
| print() | |
| print(f"\nPlugin Repositories ({len(results['pluginRepositories'])}):") | |
| for repo in results['pluginRepositories']: | |
| print(f" ID: {repo.get('id', 'N/A')}") | |
| print(f" URL: {repo.get('url', 'N/A')}") | |
| print(f" Name: {repo.get('name', 'N/A')}") | |
| print() | |
| elif path.is_dir(): | |
| # Directory scan | |
| scan_directory(path) | |
| else: | |
| print(f"Error: {target} is not a valid file or directory", file=sys.stderr) | |
| sys.exit(1) | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment