Last active
November 10, 2025 10:46
-
-
Save mysiki/e4449bc239ad1b4cbae62cd99fae7e36 to your computer and use it in GitHub Desktop.
retro gamelist.xml cleanup duplicated
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Duplicate Game Cleaner for GameList XML files | |
| This script identifies and removes duplicate games based on: | |
| 1. If different languages: Keep French (fr) game | |
| 2. If same language but different formats: Keep zip file | |
| Duplicates are identified by the same game ID in the XML file. | |
| By default, files are moved to a .trash folder in the same directory. | |
| Use --permanent-delete flag to permanently delete files instead. | |
| """ | |
| import xml.etree.ElementTree as ET | |
| import os | |
| import sys | |
| import argparse | |
| import shutil | |
| from pathlib import Path | |
| from typing import Dict, List, Tuple, Optional | |
| class GameEntry: | |
| def __init__(self, game_element, xml_file_dir): | |
| self.element = game_element | |
| self.xml_file_dir = xml_file_dir | |
| # Extract game information | |
| self.id = game_element.get('id') | |
| self.path = self._get_text('path') | |
| self.name = self._get_text('name') | |
| self.lang = self._get_text('lang', '') | |
| # Determine file extension and full path | |
| self.file_extension = Path(self.path).suffix.lower() if self.path else '' | |
| self.full_path = self._get_full_path() | |
| def _get_text(self, tag: str, default: str = '') -> str: | |
| """Get text content of a tag, return default if not found""" | |
| elem = self.element.find(tag) | |
| return elem.text.strip() if elem is not None and elem.text else default | |
| def _get_full_path(self) -> Optional[str]: | |
| """Get the full absolute path to the game file""" | |
| if not self.path: | |
| return None | |
| # Handle relative paths (starting with ./) | |
| if self.path.startswith('./'): | |
| return os.path.join(self.xml_file_dir, self.path[2:]) | |
| elif not os.path.isabs(self.path): | |
| return os.path.join(self.xml_file_dir, self.path) | |
| else: | |
| return self.path | |
| def has_french(self) -> bool: | |
| """Check if the game has French language support""" | |
| return 'fr' in self.lang.lower() | |
| def is_zip(self) -> bool: | |
| """Check if the game file is a zip file""" | |
| return self.file_extension == '.zip' | |
| def is_pc(self) -> bool: | |
| """Check if the game file is a .pc file (DOS games)""" | |
| return self.file_extension == '.pc' | |
| def is_m3u(self) -> bool: | |
| """Check if the game file is an .m3u file""" | |
| return self.file_extension == '.m3u' | |
| def __str__(self): | |
| return f"Game(id={self.id}, name={self.name}, lang={self.lang}, path={self.path}, ext={self.file_extension})" | |
| class DuplicateGameCleaner: | |
| def __init__(self, xml_file: str, dry_run: bool = True, permanent_delete: bool = False): | |
| self.xml_file = xml_file | |
| self.xml_file_dir = os.path.dirname(os.path.abspath(xml_file)) | |
| self.dry_run = dry_run | |
| self.permanent_delete = permanent_delete | |
| self.games_by_id: Dict[str, List[GameEntry]] = {} | |
| self.tree = None | |
| self.root = None | |
| def parse_xml(self): | |
| """Parse the XML file and group games by ID""" | |
| try: | |
| self.tree = ET.parse(self.xml_file) | |
| self.root = self.tree.getroot() | |
| for game_elem in self.root.findall('game'): | |
| game = GameEntry(game_elem, self.xml_file_dir) | |
| if game.id: | |
| if game.id not in self.games_by_id: | |
| self.games_by_id[game.id] = [] | |
| self.games_by_id[game.id].append(game) | |
| except ET.ParseError as e: | |
| print(f"Error parsing XML file: {e}") | |
| sys.exit(1) | |
| except FileNotFoundError: | |
| print(f"XML file not found: {self.xml_file}") | |
| sys.exit(1) | |
| def find_duplicates(self) -> Dict[str, List[GameEntry]]: | |
| """Find games with duplicate IDs""" | |
| return {game_id: games for game_id, games in self.games_by_id.items() if len(games) > 1} | |
| def select_game_to_keep(self, games: List[GameEntry]) -> GameEntry: | |
| """ | |
| Select which game to keep based on the criteria: | |
| 1. If different languages: Keep French (fr) game | |
| 2. If same language but different formats: Prefer .pc over .zip, then .zip over others | |
| """ | |
| if len(games) == 1: | |
| return games[0] | |
| # Check if we have different languages | |
| languages = {game.lang for game in games} | |
| if len(languages) > 1: | |
| # Different languages - prefer French | |
| french_games = [game for game in games if game.has_french()] | |
| if french_games: | |
| # If multiple French games, apply format preference | |
| return self._select_by_format_preference(french_games) | |
| else: | |
| # No French games, apply format preference to all | |
| return self._select_by_format_preference(games) | |
| else: | |
| # Same language - apply format preference | |
| return self._select_by_format_preference(games) | |
| def _select_by_format_preference(self, games: List[GameEntry]) -> GameEntry: | |
| """ | |
| Select game based on format preference: .pc > .zip > .m3u (subfolder) > .m3u (same dir) > others | |
| """ | |
| # First preference: .pc files (for DOS games) | |
| pc_games = [game for game in games if game.is_pc()] | |
| if pc_games: | |
| return pc_games[0] | |
| # Second preference: .zip files | |
| zip_games = [game for game in games if game.is_zip()] | |
| if zip_games: | |
| return zip_games[0] | |
| # Third preference: .m3u files that use subfolders | |
| m3u_games = [game for game in games if game.is_m3u()] | |
| if m3u_games: | |
| # Among .m3u files, prefer those that use subfolders | |
| subfolder_m3u = [game for game in m3u_games if self.m3u_uses_subfolders(game)] | |
| if subfolder_m3u: | |
| return subfolder_m3u[0] | |
| # If no subfolder m3u, return first m3u | |
| return m3u_games[0] | |
| # Fallback: return first game | |
| return games[0] | |
| def create_trash_folder(self, file_path: str) -> str: | |
| """Create .trash folder in the same directory as the file""" | |
| file_dir = os.path.dirname(file_path) | |
| trash_dir = os.path.join(file_dir, '.trash') | |
| if not os.path.exists(trash_dir): | |
| os.makedirs(trash_dir, exist_ok=True) | |
| return trash_dir | |
| def get_unique_trash_path(self, file_path: str, trash_dir: str) -> str: | |
| """Get a unique path in trash folder to avoid conflicts""" | |
| filename = os.path.basename(file_path) | |
| trash_path = os.path.join(trash_dir, filename) | |
| # If file already exists in trash, add a number suffix | |
| counter = 1 | |
| base_name = os.path.splitext(filename)[0] | |
| extension = os.path.splitext(filename)[1] | |
| while os.path.exists(trash_path): | |
| new_filename = f"{base_name}_{counter}{extension}" | |
| trash_path = os.path.join(trash_dir, new_filename) | |
| counter += 1 | |
| return trash_path | |
| def read_m3u_files(self, m3u_path: str) -> List[str]: | |
| """Read .m3u file and return list of referenced disc files""" | |
| disc_files = [] | |
| try: | |
| with open(m3u_path, 'r', encoding='utf-8') as f: | |
| m3u_dir = os.path.dirname(m3u_path) | |
| for line in f: | |
| line = line.strip() | |
| # Skip empty lines and comments | |
| if not line or line.startswith('#'): | |
| continue | |
| # Handle relative paths | |
| if not os.path.isabs(line): | |
| disc_file_path = os.path.join(m3u_dir, line) | |
| else: | |
| disc_file_path = line | |
| # Normalize the path | |
| disc_file_path = os.path.normpath(disc_file_path) | |
| disc_files.append(disc_file_path) | |
| except (FileNotFoundError, UnicodeDecodeError, OSError) as e: | |
| print(f" โ ๏ธ Error reading .m3u file {m3u_path}: {e}") | |
| return disc_files | |
| def is_m3u_file(self, file_path: str) -> bool: | |
| """Check if the file is an .m3u file""" | |
| return file_path.lower().endswith('.m3u') | |
| def m3u_uses_subfolders(self, game: GameEntry) -> bool: | |
| """Check if an .m3u file references files in subfolders""" | |
| if not game.is_m3u() or not game.full_path: | |
| return False | |
| try: | |
| disc_files = self.read_m3u_files(game.full_path) | |
| if not disc_files: | |
| return False | |
| # Check if any of the referenced files are in subfolders | |
| m3u_dir = os.path.dirname(game.full_path) | |
| for disc_file in disc_files: | |
| # Get relative path from m3u directory | |
| try: | |
| rel_path = os.path.relpath(disc_file, m3u_dir) | |
| # If relative path contains directory separators, it's in a subfolder | |
| if os.path.dirname(rel_path): | |
| return True | |
| except ValueError: | |
| # Can't get relative path, skip this file | |
| continue | |
| return False | |
| except Exception: | |
| # If we can't read the m3u file, assume it doesn't use subfolders | |
| return False | |
| def delete_file_safely(self, file_path: str) -> bool: | |
| """Delete or move a file safely with error handling""" | |
| if not file_path or not os.path.exists(file_path): | |
| print(f" โ ๏ธ File not found: {file_path}") | |
| return False | |
| files_to_delete = [file_path] | |
| # If this is an .m3u file, also get the disc files it references | |
| if self.is_m3u_file(file_path): | |
| disc_files = self.read_m3u_files(file_path) | |
| if disc_files: | |
| print(f" ๐ .m3u file contains {len(disc_files)} disc files:") | |
| for disc_file in disc_files: | |
| print(f" - {disc_file}") | |
| if os.path.exists(disc_file): | |
| files_to_delete.append(disc_file) | |
| else: | |
| print(f" โ ๏ธ Disc file not found: {disc_file}") | |
| success = True | |
| for current_file in files_to_delete: | |
| try: | |
| if self.dry_run: | |
| action = "permanently delete" if self.permanent_delete else "move to trash" | |
| file_type = "(.m3u playlist)" if current_file == file_path and self.is_m3u_file(current_file) else "(.m3u disc file)" if current_file != file_path else "" | |
| print(f" ๐ [DRY RUN] Would {action}: {current_file} {file_type}") | |
| else: | |
| if self.permanent_delete: | |
| # Permanent deletion | |
| os.remove(current_file) | |
| file_type = "(.m3u playlist)" if current_file == file_path and self.is_m3u_file(current_file) else "(.m3u disc file)" if current_file != file_path else "" | |
| print(f" โ Permanently deleted: {current_file} {file_type}") | |
| else: | |
| # Move to trash | |
| trash_dir = self.create_trash_folder(current_file) | |
| trash_path = self.get_unique_trash_path(current_file, trash_dir) | |
| shutil.move(current_file, trash_path) | |
| file_type = "(.m3u playlist)" if current_file == file_path and self.is_m3u_file(current_file) else "(.m3u disc file)" if current_file != file_path else "" | |
| print(f" ๐๏ธ Moved to trash: {current_file} โ {trash_path} {file_type}") | |
| except (OSError, shutil.Error) as e: | |
| action = "deleting" if self.permanent_delete else "moving to trash" | |
| print(f" โ Error {action} {current_file}: {e}") | |
| success = False | |
| return success | |
| def remove_game_from_xml(self, game_to_remove: GameEntry): | |
| """Remove a game entry from the XML tree""" | |
| try: | |
| self.root.remove(game_to_remove.element) | |
| return True | |
| except ValueError: | |
| print(f" โ ๏ธ Game element not found in XML tree") | |
| return False | |
| def clean_duplicates(self): | |
| """Main method to clean duplicate games""" | |
| print(f"๐ฎ Duplicate Game Cleaner") | |
| print(f"๐ XML file: {self.xml_file}") | |
| if self.dry_run: | |
| print(f"๐งช Mode: DRY RUN") | |
| else: | |
| deletion_mode = "PERMANENT DELETION" if self.permanent_delete else "MOVE TO TRASH" | |
| print(f"๐งช Mode: LIVE - {deletion_mode}") | |
| print("=" * 60) | |
| # Parse XML | |
| self.parse_xml() | |
| total_games = sum(len(games) for games in self.games_by_id.values()) | |
| print(f"๐ Total games found: {total_games}") | |
| # Find duplicates | |
| duplicates = self.find_duplicates() | |
| print(f"๐ Duplicate game IDs found: {len(duplicates)}") | |
| if not duplicates: | |
| print("โจ No duplicates found! Your game library is clean.") | |
| return | |
| total_deleted = 0 | |
| games_to_remove = [] | |
| # Process each duplicate group | |
| for game_id, games in duplicates.items(): | |
| print(f"\n๐ฏ Processing duplicates for Game ID: {game_id}") | |
| print(f" Found {len(games)} copies:") | |
| for i, game in enumerate(games, 1): | |
| lang_display = game.lang if game.lang else "unknown" | |
| print(f" {i}. {game.name} ({lang_display}) - {game.file_extension} - {game.path}") | |
| # Select game to keep | |
| keep_game = self.select_game_to_keep(games) | |
| print(f" โ Keeping: {keep_game.name} ({keep_game.lang}) - {keep_game.file_extension} - {keep_game.path}") | |
| # Mark others for deletion | |
| for game in games: | |
| if game != keep_game: | |
| print(f" ๐๏ธ Marking for deletion: {game.name} ({game.lang}) - {game.file_extension} - {game.path}") | |
| # Delete physical file | |
| if self.delete_file_safely(game.full_path): | |
| games_to_remove.append(game) | |
| total_deleted += 1 | |
| # Remove games from XML (only in live mode) | |
| if games_to_remove and not self.dry_run: | |
| print(f"\n๐ Updating XML file...") | |
| removed_count = 0 | |
| for game in games_to_remove: | |
| if self.remove_game_from_xml(game): | |
| removed_count += 1 | |
| # Create backup | |
| backup_file = f"{self.xml_file}.backup" | |
| self.tree.write(backup_file, encoding='utf-8', xml_declaration=True) | |
| print(f"๐พ Backup created: {backup_file}") | |
| # Save updated XML | |
| self.tree.write(self.xml_file, encoding='utf-8', xml_declaration=True) | |
| print(f"โ Updated XML file: {self.xml_file}") | |
| elif games_to_remove and self.dry_run: | |
| print(f"\n๐ [DRY RUN] Would update XML file...") | |
| print(f"๐ [DRY RUN] Would remove {len(games_to_remove)} entries from XML") | |
| print("\n" + "=" * 60) | |
| print(f"๐ Summary:") | |
| print(f" โข Total games processed: {total_games}") | |
| print(f" โข Duplicate groups found: {len(duplicates)}") | |
| if self.dry_run: | |
| action = "permanently deleted" if self.permanent_delete else "moved to trash" | |
| print(f" โข Games would be {action}: {total_deleted}") | |
| print(f" โข XML entries would be removed: {len(games_to_remove)}") | |
| print(f"\n๐ To actually perform the operation, run with --live flag") | |
| if not self.permanent_delete: | |
| print(f" Files will be moved to .trash folders (use --permanent-delete for permanent deletion)") | |
| else: | |
| action = "permanently deleted" if self.permanent_delete else "moved to trash" | |
| print(f" โข Games {action}: {total_deleted}") | |
| print(f" โข XML entries removed: {len(games_to_remove)}") | |
| def main(): | |
| parser = argparse.ArgumentParser(description='Clean duplicate games from GameList XML') | |
| parser.add_argument('xml_file', help='Path to the GameList XML file') | |
| parser.add_argument('--live', action='store_true', | |
| help='Actually process files (default is dry run)') | |
| parser.add_argument('--dry-run', action='store_true', default=True, | |
| help='Only show what would be processed (default)') | |
| parser.add_argument('--permanent-delete', action='store_true', | |
| help='Permanently delete files instead of moving to .trash folder') | |
| args = parser.parse_args() | |
| # Determine if this is a dry run | |
| dry_run = not args.live | |
| if not os.path.exists(args.xml_file): | |
| print(f"โ Error: XML file not found: {args.xml_file}") | |
| sys.exit(1) | |
| # Show warning for permanent deletion | |
| if args.permanent_delete and not dry_run: | |
| print("โ ๏ธ WARNING: Files will be PERMANENTLY DELETED!") | |
| response = input("Are you sure you want to continue? (yes/no): ").lower().strip() | |
| if response not in ['yes', 'y']: | |
| print("Operation cancelled.") | |
| sys.exit(0) | |
| # Create cleaner and run | |
| cleaner = DuplicateGameCleaner(args.xml_file, dry_run=dry_run, permanent_delete=args.permanent_delete) | |
| try: | |
| cleaner.clean_duplicates() | |
| except KeyboardInterrupt: | |
| print(f"\nโ ๏ธ Operation cancelled by user") | |
| sys.exit(1) | |
| except Exception as e: | |
| print(f"โ Unexpected error: {e}") | |
| sys.exit(1) | |
| if __name__ == "__main__": | |
| main() |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
python3 duplicate_game_cleaner.py -h
Clean up duplicated game, keep game by preferd :
By default, dry-run. With --live move to .trash folder.