Skip to content

Instantly share code, notes, and snippets.

@X-Cotang
Last active August 26, 2025 03:22
Show Gist options
  • Select an option

  • Save X-Cotang/5640d2d446a42c3d5f0a90781503de18 to your computer and use it in GitHub Desktop.

Select an option

Save X-Cotang/5640d2d446a42c3d5f0a90781503de18 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
decompile_merge_vineflower.py
Tool to decompile multiple jar files
Windows-friendly tool to:
1) Decompile many .jar files using Vineflower (or CFR).
2) Auto-extract Vineflower's source-jar outputs.
3) Merge all .java files into ONE unified package tree (com/org/...),
similar to what jadx shows.
Requirements:
- Python 3.8+
- Java installed and on PATH (or pass --java "C:\\Path\\to\\java.exe")
Examples:
# Vineflower (recommended by user)
python decompile_merge_vineflower.py ^
-i "C:\\Users\\x\\Desktop\\web\\WEB-INF\\lib" ^
-o "C:\\Users\\x\\Desktop\\out_merged" ^
-d vineflower ^
--vineflower-jar "C:\\Users\\x\\Desktop\\tool\\vineflower.jar" ^
-j 4 --merge-mode if-changed
# CFR alternative
python decompile_merge_vineflower.py ^
-i "C:\\libs\\*.jar" ^
-o "C:\\src-out-merged" ^
-d cfr --cfr-jar "C:\\tools\\cfr.jar" -j 8 --clean-output
"""
import argparse
import concurrent.futures
import subprocess
import sys
import os
import glob
import shutil
import zipfile
from pathlib import Path
from typing import List, Optional, Tuple
def collect_jars(input_path: str) -> List[Path]:
p = Path(input_path)
if p.exists() and p.is_dir():
jars = [Path(x) for x in p.rglob("*.jar")]
else:
jars = [Path(x) for x in glob.glob(input_path, recursive=True)]
unique = sorted(set(j.resolve() for j in jars))
return unique
def run_cmd(cmd: List[str]) -> Tuple[int, str, str]:
proc = subprocess.run(cmd, capture_output=True, text=True, shell=False)
return proc.returncode, proc.stdout, proc.stderr
def build_cmd_vineflower(java_bin: str, vf_jar: Path, in_jar: Path, dest_dir: Path) -> List[str]:
# Vineflower usage: java -jar vineflower.jar [options] <in> <out>
# Options: similar to fernflower; not strictly required for basic output.
opts = ["-hes=1", "-hdc=0", "-dgs=1", "-rbr=1", "-asc=1", "-log=INFO"]
return [java_bin, "-jar", str(vf_jar), *opts, str(in_jar), str(dest_dir)]
def build_cmd_cfr(java_bin: str, cfr_jar: Path, in_jar: Path, dest_dir: Path) -> List[str]:
# CFR outputs plain .java directly into dest_dir
return [
java_bin, "-jar", str(cfr_jar),
str(in_jar),
"--outputdir", str(dest_dir),
"--decodelambdas", "true",
"--decodeenumswitch", "true",
"--silent", "true",
]
def try_extract_source_jar_into(dest_dir: Path) -> Optional[Path]:
"""
Vineflower/fernflower usually emits a JAR containing .java sources into dest_dir.
If present, extract into dest_dir / 'src' and remove the jar.
Returns the extraction path if extraction happened.
"""
jars = list(dest_dir.glob("*.jar"))
if not jars:
return None
# pick the newest file
jars.sort(key=lambda p: p.stat().st_mtime, reverse=True)
src_jar = jars[0]
if not zipfile.is_zipfile(src_jar):
return None
extract_to = dest_dir / "src"
extract_to.mkdir(parents=True, exist_ok=True)
with zipfile.ZipFile(src_jar, "r") as zf:
zf.extractall(extract_to)
try:
src_jar.unlink()
except Exception:
pass
return extract_to
def pick_best_source_root(staging_dir: Path) -> Path:
"""
Heuristics: prefer 'src' if exists (post extraction), else the staging_dir itself.
Some archives contain a top-level folder; we keep paths relative to this root.
"""
if (staging_dir / "src").exists():
return staging_dir / "src"
return staging_dir
def merge_copy(src_root: Path, out_root: Path, mode: str = "if-changed") -> Tuple[int, int, int]:
"""
Copy all *.java from src_root into out_root preserving relative paths.
mode:
- overwrite: always overwrite destination
- skip: do not overwrite existing
- if-changed: overwrite only if content differs (default)
- keep-both: if conflict, write with suffix __dupN.java
Returns (copied, skipped, overwritten).
"""
copied = skipped = overwritten = 0
for src in src_root.rglob("*.java"):
rel = src.relative_to(src_root)
dst = out_root / rel
dst.parent.mkdir(parents=True, exist_ok=True)
if not dst.exists():
shutil.copy2(src, dst)
copied += 1
continue
if mode == "overwrite":
shutil.copy2(src, dst)
overwritten += 1
elif mode == "skip":
skipped += 1
elif mode == "if-changed":
try:
if src.stat().st_size == dst.stat().st_size and open(src, 'rb').read(4096) == open(dst, 'rb').read(4096):
# quick check: first 4KB equal and size equal -> likely same; avoid full read cost
skipped += 1
else:
# do full compare to be safe
with open(src, 'rb') as f1, open(dst, 'rb') as f2:
if f1.read() == f2.read():
skipped += 1
else:
shutil.copy2(src, dst)
overwritten += 1
except Exception:
# fallback overwrite on any IO error
shutil.copy2(src, dst)
overwritten += 1
elif mode == "keep-both":
# find a free suffix
base = dst.stem
ext = dst.suffix
k = 1
while True:
alt = dst.with_name(f"{base}__dup{k}{ext}")
if not alt.exists():
shutil.copy2(src, alt)
copied += 1
break
k += 1
else:
# default if unknown
shutil.copy2(src, dst)
overwritten += 1
return copied, skipped, overwritten
def process_one(jar: Path, stage_root: Path, out_root: Path, java_bin: str,
decompiler: str, vf_jar: Optional[Path], cfr_jar: Optional[Path],
merge_mode: str) -> str:
name = jar.stem
staging_dir = stage_root / name
staging_dir.mkdir(parents=True, exist_ok=True)
if decompiler == "vineflower":
if not vf_jar or not vf_jar.exists():
return f"[VF] MISSING vineflower.jar for {jar}\n"
cmd = build_cmd_vineflower(java_bin, vf_jar, jar, staging_dir)
code, out, err = run_cmd(cmd)
if code != 0:
return f"[VF] FAIL {jar} -> {staging_dir}\nSTDOUT:\n{out}\nSTDERR:\n{err}\n"
# try to extract source jar if emitted
extracted = try_extract_source_jar_into(staging_dir)
src_root = pick_best_source_root(staging_dir if extracted is None else extracted.parent)
c, s, o = merge_copy(src_root, out_root, mode=merge_mode)
return f"[VF] OK {jar} -> merged ({c} copied, {o} overwritten, {s} skipped)\n"
elif decompiler == "cfr":
if not cfr_jar or not cfr_jar.exists():
return f"[CFR] MISSING cfr.jar for {jar}\n"
# CFR can output directly to a 'src' folder in staging
cfr_out = staging_dir / "src"
cfr_out.mkdir(parents=True, exist_ok=True)
cmd = build_cmd_cfr(java_bin, cfr_jar, jar, cfr_out)
code, out, err = run_cmd(cmd)
if code != 0:
return f"[CFR] FAIL {jar} -> {cfr_out}\nSTDOUT:\n{out}\nSTDERR:\n{err}\n"
c, s, o = merge_copy(cfr_out, out_root, mode=merge_mode)
return f"[CFR] OK {jar} -> merged ({c} copied, {o} overwritten, {s} skipped)\n"
else:
return f"[ERR] Unknown decompiler: {decompiler}\n"
def main(argv=None):
parser = argparse.ArgumentParser(description="Decompile many JARs with Vineflower/CFR and merge into a unified package tree.")
parser.add_argument("-i", "--input", required=True, help="Folder or glob (e.g., C:\\libs or C:\\libs\\*.jar)")
parser.add_argument("-o", "--out", required=True, help="Unified output folder (merged package tree)")
parser.add_argument("-d", "--decompiler", choices=["vineflower", "cfr"], default="vineflower")
parser.add_argument("--vineflower-jar", help="Path to vineflower.jar")
parser.add_argument("--cfr-jar", help="Path to cfr.jar")
parser.add_argument("--stage-dir", help="Staging folder (default: <out>\\_stage)")
parser.add_argument("--clean-output", action="store_true", help="Delete output folder before merging")
parser.add_argument("--merge-mode", choices=["overwrite", "skip", "if-changed", "keep-both"], default="if-changed",
help="Conflict policy when the same .java path already exists in output")
parser.add_argument("-j", "--jobs", type=int, default=os.cpu_count() or 4, help="Parallel workers")
parser.add_argument("--java", default="java", help="Path to java.exe, if not on PATH")
args = parser.parse_args(argv)
out_root = Path(args.out).resolve()
stage_root = Path(args.stage_dir).resolve() if args.stage_dir else (out_root / "_stage")
stage_root.mkdir(parents=True, exist_ok=True)
if args.clean_output and out_root.exists():
# Careful: remove everything in out_root except stage_root if stage is within out
if stage_root.is_relative_to(out_root):
# Remove all except stage
for child in out_root.iterdir():
if child.resolve() == stage_root.resolve():
continue
if child.is_dir():
shutil.rmtree(child, ignore_errors=True)
else:
try:
child.unlink()
except Exception:
pass
else:
shutil.rmtree(out_root, ignore_errors=True)
out_root.mkdir(parents=True, exist_ok=True)
jars = collect_jars(args.input)
if not jars:
print(f"No .jar found under: {args.input}", file=sys.stderr)
return 2
vf_jar = Path(args.vineflower_jar).resolve() if args.vineflower_jar else None
cfr_jar = Path(args.cfr_jar).resolve() if args.cfr_jar else None
print(f"Found {len(jars)} JAR(s). Decompiler={args.decompiler}, MergeMode={args.merge_mode}, Jobs={args.jobs}")
results: List[str] = []
with concurrent.futures.ThreadPoolExecutor(max_workers=max(1, args.jobs)) as ex:
futs = [
ex.submit(process_one, jar, stage_root, out_root, args.java,
args.decompiler, vf_jar, cfr_jar, args.merge_mode)
for jar in jars
]
for fut in concurrent.futures.as_completed(futs):
try:
msg = fut.result()
except Exception as e:
msg = f"[ERR] {e}"
results.append(msg)
print(msg, end="")
print("\nDone.")
return 0 if all("OK" in r for r in results) else 1
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment