Skip to content

Instantly share code, notes, and snippets.

@mysiki
Last active November 10, 2025 10:46
Show Gist options
  • Select an option

  • Save mysiki/e4449bc239ad1b4cbae62cd99fae7e36 to your computer and use it in GitHub Desktop.

Select an option

Save mysiki/e4449bc239ad1b4cbae62cd99fae7e36 to your computer and use it in GitHub Desktop.
retro gamelist.xml cleanup duplicated
#!/usr/bin/env python3
"""
Duplicate Game Cleaner for GameList XML files
This script identifies and removes duplicate games based on:
1. If different languages: Keep French (fr) game
2. If same language but different formats: Keep zip file
Duplicates are identified by the same game ID in the XML file.
By default, files are moved to a .trash folder in the same directory.
Use --permanent-delete flag to permanently delete files instead.
"""
import xml.etree.ElementTree as ET
import os
import sys
import argparse
import shutil
from pathlib import Path
from typing import Dict, List, Tuple, Optional
class GameEntry:
def __init__(self, game_element, xml_file_dir):
self.element = game_element
self.xml_file_dir = xml_file_dir
# Extract game information
self.id = game_element.get('id')
self.path = self._get_text('path')
self.name = self._get_text('name')
self.lang = self._get_text('lang', '')
# Determine file extension and full path
self.file_extension = Path(self.path).suffix.lower() if self.path else ''
self.full_path = self._get_full_path()
def _get_text(self, tag: str, default: str = '') -> str:
"""Get text content of a tag, return default if not found"""
elem = self.element.find(tag)
return elem.text.strip() if elem is not None and elem.text else default
def _get_full_path(self) -> Optional[str]:
"""Get the full absolute path to the game file"""
if not self.path:
return None
# Handle relative paths (starting with ./)
if self.path.startswith('./'):
return os.path.join(self.xml_file_dir, self.path[2:])
elif not os.path.isabs(self.path):
return os.path.join(self.xml_file_dir, self.path)
else:
return self.path
def has_french(self) -> bool:
"""Check if the game has French language support"""
return 'fr' in self.lang.lower()
def is_zip(self) -> bool:
"""Check if the game file is a zip file"""
return self.file_extension == '.zip'
def is_pc(self) -> bool:
"""Check if the game file is a .pc file (DOS games)"""
return self.file_extension == '.pc'
def is_m3u(self) -> bool:
"""Check if the game file is an .m3u file"""
return self.file_extension == '.m3u'
def __str__(self):
return f"Game(id={self.id}, name={self.name}, lang={self.lang}, path={self.path}, ext={self.file_extension})"
class DuplicateGameCleaner:
def __init__(self, xml_file: str, dry_run: bool = True, permanent_delete: bool = False):
self.xml_file = xml_file
self.xml_file_dir = os.path.dirname(os.path.abspath(xml_file))
self.dry_run = dry_run
self.permanent_delete = permanent_delete
self.games_by_id: Dict[str, List[GameEntry]] = {}
self.tree = None
self.root = None
def parse_xml(self):
"""Parse the XML file and group games by ID"""
try:
self.tree = ET.parse(self.xml_file)
self.root = self.tree.getroot()
for game_elem in self.root.findall('game'):
game = GameEntry(game_elem, self.xml_file_dir)
if game.id:
if game.id not in self.games_by_id:
self.games_by_id[game.id] = []
self.games_by_id[game.id].append(game)
except ET.ParseError as e:
print(f"Error parsing XML file: {e}")
sys.exit(1)
except FileNotFoundError:
print(f"XML file not found: {self.xml_file}")
sys.exit(1)
def find_duplicates(self) -> Dict[str, List[GameEntry]]:
"""Find games with duplicate IDs"""
return {game_id: games for game_id, games in self.games_by_id.items() if len(games) > 1}
def select_game_to_keep(self, games: List[GameEntry]) -> GameEntry:
"""
Select which game to keep based on the criteria:
1. If different languages: Keep French (fr) game
2. If same language but different formats: Prefer .pc over .zip, then .zip over others
"""
if len(games) == 1:
return games[0]
# Check if we have different languages
languages = {game.lang for game in games}
if len(languages) > 1:
# Different languages - prefer French
french_games = [game for game in games if game.has_french()]
if french_games:
# If multiple French games, apply format preference
return self._select_by_format_preference(french_games)
else:
# No French games, apply format preference to all
return self._select_by_format_preference(games)
else:
# Same language - apply format preference
return self._select_by_format_preference(games)
def _select_by_format_preference(self, games: List[GameEntry]) -> GameEntry:
"""
Select game based on format preference: .pc > .zip > .m3u (subfolder) > .m3u (same dir) > others
"""
# First preference: .pc files (for DOS games)
pc_games = [game for game in games if game.is_pc()]
if pc_games:
return pc_games[0]
# Second preference: .zip files
zip_games = [game for game in games if game.is_zip()]
if zip_games:
return zip_games[0]
# Third preference: .m3u files that use subfolders
m3u_games = [game for game in games if game.is_m3u()]
if m3u_games:
# Among .m3u files, prefer those that use subfolders
subfolder_m3u = [game for game in m3u_games if self.m3u_uses_subfolders(game)]
if subfolder_m3u:
return subfolder_m3u[0]
# If no subfolder m3u, return first m3u
return m3u_games[0]
# Fallback: return first game
return games[0]
def create_trash_folder(self, file_path: str) -> str:
"""Create .trash folder in the same directory as the file"""
file_dir = os.path.dirname(file_path)
trash_dir = os.path.join(file_dir, '.trash')
if not os.path.exists(trash_dir):
os.makedirs(trash_dir, exist_ok=True)
return trash_dir
def get_unique_trash_path(self, file_path: str, trash_dir: str) -> str:
"""Get a unique path in trash folder to avoid conflicts"""
filename = os.path.basename(file_path)
trash_path = os.path.join(trash_dir, filename)
# If file already exists in trash, add a number suffix
counter = 1
base_name = os.path.splitext(filename)[0]
extension = os.path.splitext(filename)[1]
while os.path.exists(trash_path):
new_filename = f"{base_name}_{counter}{extension}"
trash_path = os.path.join(trash_dir, new_filename)
counter += 1
return trash_path
def read_m3u_files(self, m3u_path: str) -> List[str]:
"""Read .m3u file and return list of referenced disc files"""
disc_files = []
try:
with open(m3u_path, 'r', encoding='utf-8') as f:
m3u_dir = os.path.dirname(m3u_path)
for line in f:
line = line.strip()
# Skip empty lines and comments
if not line or line.startswith('#'):
continue
# Handle relative paths
if not os.path.isabs(line):
disc_file_path = os.path.join(m3u_dir, line)
else:
disc_file_path = line
# Normalize the path
disc_file_path = os.path.normpath(disc_file_path)
disc_files.append(disc_file_path)
except (FileNotFoundError, UnicodeDecodeError, OSError) as e:
print(f" โš ๏ธ Error reading .m3u file {m3u_path}: {e}")
return disc_files
def is_m3u_file(self, file_path: str) -> bool:
"""Check if the file is an .m3u file"""
return file_path.lower().endswith('.m3u')
def m3u_uses_subfolders(self, game: GameEntry) -> bool:
"""Check if an .m3u file references files in subfolders"""
if not game.is_m3u() or not game.full_path:
return False
try:
disc_files = self.read_m3u_files(game.full_path)
if not disc_files:
return False
# Check if any of the referenced files are in subfolders
m3u_dir = os.path.dirname(game.full_path)
for disc_file in disc_files:
# Get relative path from m3u directory
try:
rel_path = os.path.relpath(disc_file, m3u_dir)
# If relative path contains directory separators, it's in a subfolder
if os.path.dirname(rel_path):
return True
except ValueError:
# Can't get relative path, skip this file
continue
return False
except Exception:
# If we can't read the m3u file, assume it doesn't use subfolders
return False
def delete_file_safely(self, file_path: str) -> bool:
"""Delete or move a file safely with error handling"""
if not file_path or not os.path.exists(file_path):
print(f" โš ๏ธ File not found: {file_path}")
return False
files_to_delete = [file_path]
# If this is an .m3u file, also get the disc files it references
if self.is_m3u_file(file_path):
disc_files = self.read_m3u_files(file_path)
if disc_files:
print(f" ๐Ÿ“€ .m3u file contains {len(disc_files)} disc files:")
for disc_file in disc_files:
print(f" - {disc_file}")
if os.path.exists(disc_file):
files_to_delete.append(disc_file)
else:
print(f" โš ๏ธ Disc file not found: {disc_file}")
success = True
for current_file in files_to_delete:
try:
if self.dry_run:
action = "permanently delete" if self.permanent_delete else "move to trash"
file_type = "(.m3u playlist)" if current_file == file_path and self.is_m3u_file(current_file) else "(.m3u disc file)" if current_file != file_path else ""
print(f" ๐Ÿ” [DRY RUN] Would {action}: {current_file} {file_type}")
else:
if self.permanent_delete:
# Permanent deletion
os.remove(current_file)
file_type = "(.m3u playlist)" if current_file == file_path and self.is_m3u_file(current_file) else "(.m3u disc file)" if current_file != file_path else ""
print(f" โœ… Permanently deleted: {current_file} {file_type}")
else:
# Move to trash
trash_dir = self.create_trash_folder(current_file)
trash_path = self.get_unique_trash_path(current_file, trash_dir)
shutil.move(current_file, trash_path)
file_type = "(.m3u playlist)" if current_file == file_path and self.is_m3u_file(current_file) else "(.m3u disc file)" if current_file != file_path else ""
print(f" ๐Ÿ—‘๏ธ Moved to trash: {current_file} โ†’ {trash_path} {file_type}")
except (OSError, shutil.Error) as e:
action = "deleting" if self.permanent_delete else "moving to trash"
print(f" โŒ Error {action} {current_file}: {e}")
success = False
return success
def remove_game_from_xml(self, game_to_remove: GameEntry):
"""Remove a game entry from the XML tree"""
try:
self.root.remove(game_to_remove.element)
return True
except ValueError:
print(f" โš ๏ธ Game element not found in XML tree")
return False
def clean_duplicates(self):
"""Main method to clean duplicate games"""
print(f"๐ŸŽฎ Duplicate Game Cleaner")
print(f"๐Ÿ“ XML file: {self.xml_file}")
if self.dry_run:
print(f"๐Ÿงช Mode: DRY RUN")
else:
deletion_mode = "PERMANENT DELETION" if self.permanent_delete else "MOVE TO TRASH"
print(f"๐Ÿงช Mode: LIVE - {deletion_mode}")
print("=" * 60)
# Parse XML
self.parse_xml()
total_games = sum(len(games) for games in self.games_by_id.values())
print(f"๐Ÿ“Š Total games found: {total_games}")
# Find duplicates
duplicates = self.find_duplicates()
print(f"๐Ÿ” Duplicate game IDs found: {len(duplicates)}")
if not duplicates:
print("โœจ No duplicates found! Your game library is clean.")
return
total_deleted = 0
games_to_remove = []
# Process each duplicate group
for game_id, games in duplicates.items():
print(f"\n๐ŸŽฏ Processing duplicates for Game ID: {game_id}")
print(f" Found {len(games)} copies:")
for i, game in enumerate(games, 1):
lang_display = game.lang if game.lang else "unknown"
print(f" {i}. {game.name} ({lang_display}) - {game.file_extension} - {game.path}")
# Select game to keep
keep_game = self.select_game_to_keep(games)
print(f" โœ… Keeping: {keep_game.name} ({keep_game.lang}) - {keep_game.file_extension} - {keep_game.path}")
# Mark others for deletion
for game in games:
if game != keep_game:
print(f" ๐Ÿ—‘๏ธ Marking for deletion: {game.name} ({game.lang}) - {game.file_extension} - {game.path}")
# Delete physical file
if self.delete_file_safely(game.full_path):
games_to_remove.append(game)
total_deleted += 1
# Remove games from XML (only in live mode)
if games_to_remove and not self.dry_run:
print(f"\n๐Ÿ“ Updating XML file...")
removed_count = 0
for game in games_to_remove:
if self.remove_game_from_xml(game):
removed_count += 1
# Create backup
backup_file = f"{self.xml_file}.backup"
self.tree.write(backup_file, encoding='utf-8', xml_declaration=True)
print(f"๐Ÿ’พ Backup created: {backup_file}")
# Save updated XML
self.tree.write(self.xml_file, encoding='utf-8', xml_declaration=True)
print(f"โœ… Updated XML file: {self.xml_file}")
elif games_to_remove and self.dry_run:
print(f"\n๐Ÿ“ [DRY RUN] Would update XML file...")
print(f"๐Ÿ” [DRY RUN] Would remove {len(games_to_remove)} entries from XML")
print("\n" + "=" * 60)
print(f"๐Ÿ“ˆ Summary:")
print(f" โ€ข Total games processed: {total_games}")
print(f" โ€ข Duplicate groups found: {len(duplicates)}")
if self.dry_run:
action = "permanently deleted" if self.permanent_delete else "moved to trash"
print(f" โ€ข Games would be {action}: {total_deleted}")
print(f" โ€ข XML entries would be removed: {len(games_to_remove)}")
print(f"\n๐Ÿš€ To actually perform the operation, run with --live flag")
if not self.permanent_delete:
print(f" Files will be moved to .trash folders (use --permanent-delete for permanent deletion)")
else:
action = "permanently deleted" if self.permanent_delete else "moved to trash"
print(f" โ€ข Games {action}: {total_deleted}")
print(f" โ€ข XML entries removed: {len(games_to_remove)}")
def main():
parser = argparse.ArgumentParser(description='Clean duplicate games from GameList XML')
parser.add_argument('xml_file', help='Path to the GameList XML file')
parser.add_argument('--live', action='store_true',
help='Actually process files (default is dry run)')
parser.add_argument('--dry-run', action='store_true', default=True,
help='Only show what would be processed (default)')
parser.add_argument('--permanent-delete', action='store_true',
help='Permanently delete files instead of moving to .trash folder')
args = parser.parse_args()
# Determine if this is a dry run
dry_run = not args.live
if not os.path.exists(args.xml_file):
print(f"โŒ Error: XML file not found: {args.xml_file}")
sys.exit(1)
# Show warning for permanent deletion
if args.permanent_delete and not dry_run:
print("โš ๏ธ WARNING: Files will be PERMANENTLY DELETED!")
response = input("Are you sure you want to continue? (yes/no): ").lower().strip()
if response not in ['yes', 'y']:
print("Operation cancelled.")
sys.exit(0)
# Create cleaner and run
cleaner = DuplicateGameCleaner(args.xml_file, dry_run=dry_run, permanent_delete=args.permanent_delete)
try:
cleaner.clean_duplicates()
except KeyboardInterrupt:
print(f"\nโš ๏ธ Operation cancelled by user")
sys.exit(1)
except Exception as e:
print(f"โŒ Unexpected error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()
@mysiki
Copy link
Author

mysiki commented Nov 10, 2025

python3 duplicate_game_cleaner.py -h

Clean up duplicated game, keep game by preferd :

  • French
  • .pc (for dos)
  • .zip
  • m3u playlist where files are in subfolder

By default, dry-run. With --live move to .trash folder.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment