Skip to content

Instantly share code, notes, and snippets.

@monperrus
Created February 5, 2026 11:14
Show Gist options
  • Select an option

  • Save monperrus/bec6b9e3e7140c8ee978f7b5d6e72918 to your computer and use it in GitHub Desktop.

Select an option

Save monperrus/bec6b9e3e7140c8ee978f7b5d6e72918 to your computer and use it in GitHub Desktop.
Script to identify all <repository> and <pluginRepository> in a Maven dependency tree.
#!/usr/bin/env python3
"""
Script to identify all <repository> and <pluginRepository> in a Maven dependency tree.
Usage:
python maven_repo_finder.py <pom.xml or directory>
Author: Martin Monperrus
"""
import xml.etree.ElementTree as ET
import sys
from pathlib import Path
import tempfile
import os
import tempfile
import subprocess
import json
def find_repositories_and_dependencies(xml_file, visited=None):
"""
Parse a Maven pom.xml file, extract repositories, and recursively fetch dependencies.
Args:
xml_file: Path to the pom.xml file
visited: Set of already visited artifacts to avoid cycles
Returns:
Dictionary with repositories and dependency information
"""
import urllib.request
if visited is None:
visited = set()
try:
tree = ET.parse(xml_file)
root = tree.getroot()
# Maven namespace
ns = {'mvn': 'http://maven.apache.org/POM/4.0.0'}
# Get artifact coordinates to track visited
group_id = root.findtext('groupId') or root.findtext('mvn:groupId', namespaces=ns)
artifact_id = root.findtext('artifactId') or root.findtext('mvn:artifactId', namespaces=ns)
version = root.findtext('version') or root.findtext('mvn:version', namespaces=ns)
artifact_key = f"{group_id}:{artifact_id}:{version}"
if artifact_key in visited:
return None
visited.add(artifact_key)
# Find dependencies
dependencies = root.findall('.//dependency') or root.findall('.//mvn:dependency', ns)
dep_results = []
for dep in dependencies:
dep_group = dep.findtext('groupId') or dep.findtext('mvn:groupId', namespaces=ns)
dep_artifact = dep.findtext('artifactId') or dep.findtext('mvn:artifactId', namespaces=ns)
dep_version = dep.findtext('version') or dep.findtext('mvn:version', namespaces=ns)
# Handle version ranges - extract concrete bound
if dep_version and (dep_version.startswith('[') or dep_version.startswith('(')):
# Extract version from range like [1.0,2.0) or [1.0]
dep_version = dep_version.strip('[]()').split(',')[0]
# Resolve ${project.version} placeholder
if dep_version and '${project.version}' in dep_version:
project_version = root.findtext('version') or root.findtext('mvn:version', namespaces=ns)
# If not found at root, check under parent
if not project_version:
parent = root.find('parent') or root.find('mvn:parent', namespaces=ns)
if parent is not None:
project_version = parent.findtext('version') or parent.findtext('mvn:version', namespaces=ns)
if project_version:
dep_version = dep_version.replace('${project.version}', project_version)
# print(f"Found dependency: {dep_group}:{dep_artifact}:{dep_version}")
if dep_group and dep_artifact and dep_version:
dep_info = {
'groupId': dep_group,
'artifactId': dep_artifact,
'version': dep_version
}
dep_results.append(dep_info)
# Fetch remote pom.xml
# Try to find the pom.xml in local .m2 repository
m2_repo = Path.home() / '.m2' / 'repository'
group_path = dep_group.replace('.', '/')
local_pom = m2_repo / group_path / dep_artifact / dep_version / f"{dep_artifact}-{dep_version}.pom"
if local_pom.exists():
dep_info['local_repositories'] = find_repositories_and_dependencies(
str(local_pom), visited
)
data = find_repositories(xml_file)
data['dependencies'] = dep_results
# print(data)
# Merge repositories from dependencies
all_repos = data["repositories"]
all_plugin_repos = data['pluginRepositories']
for dep_info in dep_results:
if 'local_repositories' in dep_info and dep_info['local_repositories']:
dep_repos = dep_info['local_repositories'].get('repositories', {})
for repo in dep_repos:
repo['source_file'] = str(local_pom)
if dep_repos:
all_repos.extend(dep_repos)
all_plugin_repos.extend(dep_repos)
data["repositories"] = all_repos
data['pluginRepositories'] = all_plugin_repos
# print(data)
return data
except Exception as e:
print(f"Error processing dependencies in {xml_file}: {e}", file=sys.stderr)
raise e
return None
def find_repositories(xml_file):
"""
Parse a Maven pom.xml file and extract all repository and pluginRepository entries.
Args:
xml_file: Path to the pom.xml file
Returns:
Dictionary with 'repositories' and 'pluginRepositories' lists
"""
results = {
'repositories': [],
'pluginRepositories': []
}
try:
tree = ET.parse(xml_file)
root = tree.getroot()
# Maven namespace
ns = {'mvn': 'http://maven.apache.org/POM/4.0.0'}
# Try without namespace first (some pom files don't use it)
repositories = root.findall('.//repository')
plugin_repositories = root.findall('.//pluginRepository')
# If nothing found, try with namespace
if not repositories and not plugin_repositories:
repositories = root.findall('.//mvn:repository', ns)
plugin_repositories = root.findall('.//mvn:pluginRepository', ns)
# Extract repository information
for repo in repositories:
repo_info = {}
for child in repo:
tag = child.tag.split('}')[-1] # Remove namespace if present
repo_info[tag] = child.text
results['repositories'].append(repo_info)
# Extract plugin repository information
for repo in plugin_repositories:
repo_info = {}
for child in repo:
tag = child.tag.split('}')[-1] # Remove namespace if present
repo_info[tag] = child.text
results['pluginRepositories'].append(repo_info)
return results
except ET.ParseError as e:
print(f"Error parsing {xml_file}: {e}", file=sys.stderr)
return results
except Exception as e:
print(f"Error processing {xml_file}: {e}", file=sys.stderr)
return results
def scan_directory(directory):
"""
Recursively scan directory for pom.xml files and extract repositories.
Args:
directory: Root directory to scan
"""
path = Path(directory)
all_results = {}
for pom_file in path.rglob('pom.xml'):
# Skip pom.xml files in resources folders
if 'resources' in pom_file.parts:
continue
# Run mvn dependency:go-offline to fetch dependencies locally
try:
pom_dir = pom_file.parent
print(f" Fetching dependencies for {pom_file}...")
subprocess.run(
['mvn', 'dependency:go-offline', '-f', str(pom_file)],
cwd=pom_dir,
capture_output=True,
timeout=300
)
except subprocess.TimeoutExpired:
print(f" Warning: Timeout fetching dependencies for {pom_file}", file=sys.stderr)
except Exception as e:
print(f" Warning: Could not fetch dependencies for {pom_file}: {e}", file=sys.stderr)
print(f"\nProcessing: {pom_file}")
results = find_repositories_and_dependencies(pom_file)
# print(f" Results: {results}" )
if results:
all_results[str(pom_file)] = results
if results['repositories']:
print(f" Found {len(results['repositories'])} repositories:")
for repo in results['repositories']:
print(json.dumps(repo))
if 'pluginRepositories' in results and results['pluginRepositories']:
print(f" Found {len(results['pluginRepositories'])} plugin repositories:")
for repo in results['pluginRepositories']:
print(json.dumps(repo))
return all_results
def main():
if len(sys.argv) < 2:
print("Usage: python maven_repo_finder.py <pom.xml or directory>")
print("\nExamples:")
print(" python maven_repo_finder.py pom.xml")
print(" python maven_repo_finder.py /path/to/maven/project")
sys.exit(1)
target = sys.argv[1]
path = Path(target)
if path.is_file():
# Single file
results = find_repositories_and_dependencies(path)
if results:
print(f"\n=== Results for {path} ===")
print(f"\nRepositories ({len(results['repositories'])}):")
for repo in results['repositories']:
print(f" ID: {repo.get('id', 'N/A')}")
print(f" URL: {repo.get('url', 'N/A')}")
print(f" Name: {repo.get('name', 'N/A')}")
print()
print(f"\nPlugin Repositories ({len(results['pluginRepositories'])}):")
for repo in results['pluginRepositories']:
print(f" ID: {repo.get('id', 'N/A')}")
print(f" URL: {repo.get('url', 'N/A')}")
print(f" Name: {repo.get('name', 'N/A')}")
print()
elif path.is_dir():
# Directory scan
scan_directory(path)
else:
print(f"Error: {target} is not a valid file or directory", file=sys.stderr)
sys.exit(1)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment