Created
December 18, 2025 14:21
-
-
Save nickboldt/c41b2e46f6c5b29e2a88e03674a1e664 to your computer and use it in GitHub Desktop.
compare CSV dump of Ben's working sheet with the current reality in the catalog-entities folder downstream
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Compare support and provider metadata between CSV file and YAML files. | |
| Auto-updates YAML files when CSV says "community supported" (requires --edit flag). | |
| """ | |
| import argparse | |
| import csv | |
| import os | |
| import yaml | |
| from pathlib import Path | |
| from typing import Dict, List, Tuple, Optional | |
| def parse_csv(csv_path: str) -> Dict[str, Tuple[str, str]]: | |
| """ | |
| Parse CSV and extract name -> (support, provider) mapping. | |
| Column 6 (1-indexed) = proposed-1.9-status (support) | |
| Column 10 (1-indexed) = author (provider) | |
| """ | |
| result = {} | |
| with open(csv_path, 'r') as f: | |
| reader = csv.reader(f) | |
| header = next(reader) # Skip header | |
| for row in reader: | |
| if len(row) < 10: | |
| continue | |
| name = row[0].strip() # Column 1 | |
| support = row[5].strip() if len(row) > 5 else "" # Column 6 (0-indexed = 5) | |
| provider = row[9].strip() if len(row) > 9 else "" # Column 10 (0-indexed = 9) | |
| if name: | |
| result[name] = (support, provider) | |
| return result | |
| def parse_yaml_file(yaml_path: str) -> Optional[Dict]: | |
| """Parse a YAML file and extract relevant fields.""" | |
| try: | |
| with open(yaml_path, 'r') as f: | |
| data = yaml.safe_load(f) | |
| return data | |
| except Exception as e: | |
| print(f"Error parsing {yaml_path}: {e}") | |
| return None | |
| def normalize_support_level(level: str) -> str: | |
| """Normalize support level strings for comparison. | |
| Note: 'community' in YAML equals 'community supported' in CSV""" | |
| level = level.lower().strip() | |
| # Common variations | |
| mappings = { | |
| 'ga': 'generally-available', | |
| 'generally-available': 'generally-available', | |
| 'production': 'generally-available', | |
| 'tech-preview': 'tech-preview', | |
| 'dev-preview': 'dev-preview', | |
| 'community supported': 'community-supported', | |
| 'community-supported': 'community-supported', | |
| 'community': 'community-supported', # YAML "community" = CSV "community supported" | |
| } | |
| # Handle special cases like "ga (by ACS)" | |
| if 'ga' in level and 'by' in level: | |
| return 'generally-available' | |
| for key, value in mappings.items(): | |
| if key in level: | |
| return value | |
| return level | |
| def extract_plugin_metadata(yaml_data: Dict) -> Tuple[Optional[str], Optional[str], Optional[str]]: | |
| """Extract name, support level, and provider from plugin YAML.""" | |
| if not yaml_data or yaml_data.get('kind') != 'Plugin': | |
| return None, None, None | |
| name = yaml_data.get('metadata', {}).get('name') | |
| spec = yaml_data.get('spec', {}) | |
| # Support can be a string or an object | |
| support = spec.get('support') | |
| if isinstance(support, dict): | |
| level = support.get('level', '') | |
| provider = support.get('provider', '') | |
| else: | |
| level = support or '' | |
| provider = spec.get('author', '') # Fallback to author | |
| return name, level, provider | |
| def extract_package_metadata(yaml_data: Dict) -> Tuple[Optional[str], Optional[str], Optional[str]]: | |
| """Extract name, support level, and provider from package YAML.""" | |
| if not yaml_data or yaml_data.get('kind') != 'Package': | |
| return None, None, None | |
| name = yaml_data.get('metadata', {}).get('name') | |
| spec = yaml_data.get('spec', {}) | |
| # Support can be a string or an object | |
| support = spec.get('support') | |
| if isinstance(support, dict): | |
| level = support.get('level', '') | |
| provider = support.get('provider', '') | |
| else: | |
| level = support or '' | |
| provider = spec.get('author', '') # Fallback to author | |
| # Get partOf to map package to plugin | |
| part_of = spec.get('partOf', []) | |
| part_of_name = part_of[0] if part_of else None | |
| return name, level, provider, part_of_name | |
| def update_yaml_support(yaml_path: str, new_support: str, new_provider: str = None) -> bool: | |
| """Update support level (and optionally provider) in a YAML file.""" | |
| try: | |
| with open(yaml_path, 'r') as f: | |
| data = yaml.safe_load(f) | |
| if not data or 'spec' not in data: | |
| return False | |
| spec = data['spec'] | |
| # Update support field | |
| if isinstance(spec.get('support'), dict): | |
| # Support is an object with level and provider | |
| spec['support']['level'] = new_support | |
| if new_provider: | |
| spec['support']['provider'] = new_provider | |
| else: | |
| # Support is a simple string - convert to object if provider needed | |
| if new_provider: | |
| spec['support'] = { | |
| 'level': new_support, | |
| 'provider': new_provider | |
| } | |
| else: | |
| spec['support'] = new_support | |
| # Write back with preserved formatting | |
| with open(yaml_path, 'w') as f: | |
| yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True) | |
| return True | |
| except Exception as e: | |
| print(f"Error updating {yaml_path}: {e}") | |
| return False | |
| def main(): | |
| # Parse command line arguments | |
| parser = argparse.ArgumentParser( | |
| description='Compare support and provider metadata between CSV file and YAML files.' | |
| ) | |
| parser.add_argument( | |
| '--edit', | |
| action='store_true', | |
| help='Actually update YAML files (without this flag, only reports what would be changed)' | |
| ) | |
| args = parser.parse_args() | |
| csv_path = os.path.expanduser('~/tmp/RHDH-1.9-marketplace-plugin-metadata--working-sheet.csv') | |
| packages_dir = '/home/nboldt/RHDH/DH/4/4-rhdh-plugin-catalog/catalog-index/catalog-entities/marketplace/packages' | |
| plugins_dir = '/home/nboldt/RHDH/DH/4/4-rhdh-plugin-catalog/catalog-index/catalog-entities/marketplace/plugins' | |
| # Show mode | |
| if args.edit: | |
| print("MODE: EDIT - Will update YAML files where CSV says 'community supported'") | |
| else: | |
| print("MODE: DRY RUN - Only reporting (use --edit to actually update files)") | |
| # Parse CSV | |
| print("\nParsing CSV file...") | |
| csv_data = parse_csv(csv_path) | |
| print(f"Found {len(csv_data)} entries in CSV") | |
| # Track updates and found CSV entries | |
| updates_made = [] | |
| updates_would_make = [] | |
| found_csv_entries = set() | |
| # Process plugins | |
| print("\nProcessing plugin files...") | |
| plugin_contradictions = [] | |
| for yaml_file in sorted(Path(plugins_dir).glob('*.yaml')): | |
| if yaml_file.name in ['all.yaml', '1-boilerplate.yaml.sample']: | |
| continue | |
| yaml_data = parse_yaml_file(str(yaml_file)) | |
| if not yaml_data: | |
| continue | |
| name, level, provider = extract_plugin_metadata(yaml_data) | |
| if not name: | |
| continue | |
| # Check if name exists in CSV | |
| if name in csv_data: | |
| found_csv_entries.add(name) | |
| csv_support, csv_provider = csv_data[name] | |
| # Normalize for comparison | |
| yaml_level_norm = normalize_support_level(level) | |
| csv_support_norm = normalize_support_level(csv_support) | |
| # Check for contradictions | |
| support_mismatch = yaml_level_norm != csv_support_norm | |
| provider_mismatch = provider and csv_provider and provider != csv_provider | |
| if support_mismatch or provider_mismatch: | |
| plugin_contradictions.append({ | |
| 'file': yaml_file.name, | |
| 'name': name, | |
| 'type': 'plugin', | |
| 'yaml_support': level, | |
| 'csv_support': csv_support, | |
| 'yaml_provider': provider, | |
| 'csv_provider': csv_provider, | |
| 'support_mismatch': support_mismatch, | |
| 'provider_mismatch': provider_mismatch | |
| }) | |
| # Auto-update if CSV says "community supported" -> YAML should be "community" | |
| csv_norm = normalize_support_level(csv_support) | |
| if csv_norm == 'community-supported': | |
| new_yaml_value = 'community' | |
| update_info = { | |
| 'file': yaml_file.name, | |
| 'type': 'plugin', | |
| 'old_support': level, | |
| 'new_support': new_yaml_value, | |
| 'csv_value': csv_support | |
| } | |
| if args.edit: | |
| print(f" Updating {yaml_file.name} support: '{level}' → '{new_yaml_value}' (CSV: '{csv_support}')") | |
| if update_yaml_support(str(yaml_file), new_yaml_value, csv_provider if provider_mismatch else None): | |
| updates_made.append(update_info) | |
| else: | |
| print(f" Would update {yaml_file.name} support: '{level}' → '{new_yaml_value}' (CSV: '{csv_support}')") | |
| updates_would_make.append(update_info) | |
| # Process packages | |
| print("Processing package files...") | |
| package_contradictions = [] | |
| package_to_plugin = {} | |
| for yaml_file in sorted(Path(packages_dir).glob('*.yaml')): | |
| if yaml_file.name in ['all.yaml']: | |
| continue | |
| yaml_data = parse_yaml_file(str(yaml_file)) | |
| if not yaml_data: | |
| continue | |
| result = extract_package_metadata(yaml_data) | |
| if len(result) == 4: | |
| name, level, provider, part_of = result | |
| else: | |
| name, level, provider = result | |
| part_of = None | |
| if not name: | |
| continue | |
| # Store package->plugin mapping | |
| if part_of: | |
| package_to_plugin[name] = part_of | |
| # Check against CSV using the plugin name (part_of) if available | |
| lookup_name = part_of if part_of else name | |
| if lookup_name in csv_data: | |
| found_csv_entries.add(lookup_name) | |
| csv_support, csv_provider = csv_data[lookup_name] | |
| # Normalize for comparison | |
| yaml_level_norm = normalize_support_level(level) | |
| csv_support_norm = normalize_support_level(csv_support) | |
| # Check for contradictions | |
| support_mismatch = yaml_level_norm != csv_support_norm | |
| provider_mismatch = provider and csv_provider and provider != csv_provider | |
| if support_mismatch or provider_mismatch: | |
| package_contradictions.append({ | |
| 'file': yaml_file.name, | |
| 'name': name, | |
| 'part_of': part_of, | |
| 'type': 'package', | |
| 'yaml_support': level, | |
| 'csv_support': csv_support, | |
| 'yaml_provider': provider, | |
| 'csv_provider': csv_provider, | |
| 'support_mismatch': support_mismatch, | |
| 'provider_mismatch': provider_mismatch | |
| }) | |
| # Auto-update if CSV says "community supported" -> YAML should be "community" | |
| csv_norm = normalize_support_level(csv_support) | |
| if csv_norm == 'community-supported': | |
| new_yaml_value = 'community' | |
| update_info = { | |
| 'file': yaml_file.name, | |
| 'type': 'package', | |
| 'old_support': level, | |
| 'new_support': new_yaml_value, | |
| 'csv_value': csv_support | |
| } | |
| if args.edit: | |
| print(f" Updating {yaml_file.name} support: '{level}' → '{new_yaml_value}' (CSV: '{csv_support}')") | |
| if update_yaml_support(str(yaml_file), new_yaml_value, csv_provider if provider_mismatch else None): | |
| updates_made.append(update_info) | |
| else: | |
| print(f" Would update {yaml_file.name} support: '{level}' → '{new_yaml_value}' (CSV: '{csv_support}')") | |
| updates_would_make.append(update_info) | |
| # Print results | |
| print("\n" + "="*80) | |
| print("CONTRADICTIONS FOUND") | |
| print("="*80) | |
| if plugin_contradictions: | |
| print(f"\n### PLUGIN CONTRADICTIONS ({len(plugin_contradictions)}) ###\n") | |
| for item in plugin_contradictions: | |
| print(f"File: {item['file']}") | |
| print(f" Name: {item['name']}") | |
| if item['support_mismatch']: | |
| print(f" Support MISMATCH:") | |
| print(f" YAML: {item['yaml_support']}") | |
| print(f" CSV: {item['csv_support']}") | |
| if item['provider_mismatch']: | |
| print(f" Provider MISMATCH:") | |
| print(f" YAML: {item['yaml_provider']}") | |
| print(f" CSV: {item['csv_provider']}") | |
| print() | |
| else: | |
| print("\nNo plugin contradictions found.") | |
| if package_contradictions: | |
| print(f"\n### PACKAGE CONTRADICTIONS ({len(package_contradictions)}) ###\n") | |
| for item in package_contradictions: | |
| print(f"File: {item['file']}") | |
| print(f" Package Name: {item['name']}") | |
| if item['part_of']: | |
| print(f" Part Of Plugin: {item['part_of']}") | |
| if item['support_mismatch']: | |
| print(f" Support MISMATCH:") | |
| print(f" YAML: {item['yaml_support']}") | |
| print(f" CSV: {item['csv_support']}") | |
| if item['provider_mismatch']: | |
| print(f" Provider MISMATCH:") | |
| print(f" YAML: {item['yaml_provider']}") | |
| print(f" CSV: {item['csv_provider']}") | |
| print() | |
| else: | |
| print("\nNo package contradictions found.") | |
| # Find missing entries (in CSV but not in YAML files) | |
| missing_entries = [] | |
| for csv_name in csv_data.keys(): | |
| if csv_name and csv_name not in found_csv_entries: | |
| csv_support, csv_provider = csv_data[csv_name] | |
| missing_entries.append({ | |
| 'name': csv_name, | |
| 'csv_support': csv_support, | |
| 'csv_provider': csv_provider | |
| }) | |
| # Summary | |
| total = len(plugin_contradictions) + len(package_contradictions) | |
| print("\n" + "="*80) | |
| print(f"SUMMARY: {total} total contradictions found") | |
| print(f" - {len(plugin_contradictions)} plugin contradictions") | |
| print(f" - {len(package_contradictions)} package contradictions") | |
| print("="*80) | |
| # Missing entries summary | |
| if missing_entries: | |
| print(f"\n### MISSING PLUGINS/PACKAGES ({len(missing_entries)}) ###") | |
| print("Listed in CSV but not found in YAML files:\n") | |
| for entry in missing_entries: | |
| print(f" - {entry['name']}") | |
| print(f" CSV Support: {entry['csv_support']}, CSV Provider: {entry['csv_provider']}") | |
| print(f"\nTotal missing: {len(missing_entries)}") | |
| # Updates summary | |
| if updates_made: | |
| print(f"\n### UPDATES APPLIED ({len(updates_made)}) ###\n") | |
| for update in updates_made: | |
| print(f"✓ {update['file']} ({update['type']})") | |
| print(f" Changed: '{update['old_support']}' → '{update['new_support']}'") | |
| print(f"\nTotal files updated: {len(updates_made)}") | |
| else: | |
| print("\nNo automatic updates were applied.") | |
| # Write to output files (text and CSV) | |
| output_file = os.path.expanduser('~/tmp/metadata_contradictions.txt') | |
| csv_output_file = os.path.expanduser('~/tmp/metadata_contradictions.csv') | |
| missing_csv_file = os.path.expanduser('~/tmp/metadata_missing.csv') | |
| # Write text file | |
| with open(output_file, 'w') as f: | |
| f.write("PLUGIN AND PACKAGE METADATA CONTRADICTIONS\n") | |
| f.write("="*80 + "\n\n") | |
| if plugin_contradictions: | |
| f.write(f"PLUGIN CONTRADICTIONS ({len(plugin_contradictions)})\n") | |
| f.write("-"*80 + "\n\n") | |
| for item in plugin_contradictions: | |
| f.write(f"File: {item['file']}\n") | |
| f.write(f" Name: {item['name']}\n") | |
| if item['support_mismatch']: | |
| f.write(f" Support MISMATCH: YAML='{item['yaml_support']}' vs CSV='{item['csv_support']}'\n") | |
| if item['provider_mismatch']: | |
| f.write(f" Provider MISMATCH: YAML='{item['yaml_provider']}' vs CSV='{item['csv_provider']}'\n") | |
| f.write("\n") | |
| if package_contradictions: | |
| f.write(f"\nPACKAGE CONTRADICTIONS ({len(package_contradictions)})\n") | |
| f.write("-"*80 + "\n\n") | |
| for item in package_contradictions: | |
| f.write(f"File: {item['file']}\n") | |
| f.write(f" Package Name: {item['name']}\n") | |
| if item['part_of']: | |
| f.write(f" Part Of Plugin: {item['part_of']}\n") | |
| if item['support_mismatch']: | |
| f.write(f" Support MISMATCH: YAML='{item['yaml_support']}' vs CSV='{item['csv_support']}'\n") | |
| if item['provider_mismatch']: | |
| f.write(f" Provider MISMATCH: YAML='{item['yaml_provider']}' vs CSV='{item['csv_provider']}'\n") | |
| f.write("\n") | |
| f.write(f"\nTOTAL: {total} contradictions\n") | |
| # Add missing entries section | |
| if missing_entries: | |
| f.write(f"\n\nMISSING PLUGINS/PACKAGES ({len(missing_entries)})\n") | |
| f.write("-"*80 + "\n") | |
| f.write("Listed in CSV but not found in YAML files:\n\n") | |
| for entry in missing_entries: | |
| f.write(f"Name: {entry['name']}\n") | |
| f.write(f" CSV Support: {entry['csv_support']}\n") | |
| f.write(f" CSV Provider: {entry['csv_provider']}\n\n") | |
| # Add updates section | |
| if updates_made: | |
| f.write(f"\n\nUPDATES APPLIED ({len(updates_made)})\n") | |
| f.write("-"*80 + "\n\n") | |
| for update in updates_made: | |
| f.write(f"File: {update['file']} ({update['type']})\n") | |
| f.write(f" Changed: '{update['old_support']}' → '{update['new_support']}'\n\n") | |
| # Write CSV file | |
| with open(csv_output_file, 'w', newline='') as csvfile: | |
| fieldnames = ['Type', 'File', 'Name', 'Part Of Plugin', 'Support Mismatch', | |
| 'YAML Support', 'CSV Support', 'Provider Mismatch', | |
| 'YAML Provider', 'CSV Provider'] | |
| writer = csv.DictWriter(csvfile, fieldnames=fieldnames) | |
| writer.writeheader() | |
| # Write plugin contradictions | |
| for item in plugin_contradictions: | |
| writer.writerow({ | |
| 'Type': 'Plugin', | |
| 'File': item['file'], | |
| 'Name': item['name'], | |
| 'Part Of Plugin': '', | |
| 'Support Mismatch': 'YES' if item['support_mismatch'] else 'NO', | |
| 'YAML Support': item['yaml_support'], | |
| 'CSV Support': item['csv_support'], | |
| 'Provider Mismatch': 'YES' if item['provider_mismatch'] else 'NO', | |
| 'YAML Provider': item['yaml_provider'], | |
| 'CSV Provider': item['csv_provider'] | |
| }) | |
| # Write package contradictions | |
| for item in package_contradictions: | |
| writer.writerow({ | |
| 'Type': 'Package', | |
| 'File': item['file'], | |
| 'Name': item['name'], | |
| 'Part Of Plugin': item.get('part_of', ''), | |
| 'Support Mismatch': 'YES' if item['support_mismatch'] else 'NO', | |
| 'YAML Support': item['yaml_support'], | |
| 'CSV Support': item['csv_support'], | |
| 'Provider Mismatch': 'YES' if item['provider_mismatch'] else 'NO', | |
| 'YAML Provider': item['yaml_provider'], | |
| 'CSV Provider': item['csv_provider'] | |
| }) | |
| # Write missing entries CSV file | |
| if missing_entries: | |
| with open(missing_csv_file, 'w', newline='') as csvfile: | |
| fieldnames = ['Name', 'CSV Support', 'CSV Provider', 'Status'] | |
| writer = csv.DictWriter(csvfile, fieldnames=fieldnames) | |
| writer.writeheader() | |
| for entry in missing_entries: | |
| writer.writerow({ | |
| 'Name': entry['name'], | |
| 'CSV Support': entry['csv_support'], | |
| 'CSV Provider': entry['csv_provider'], | |
| 'Status': 'NOT FOUND in YAML files' | |
| }) | |
| print(f"\nResults written to:") | |
| print(f" Text: {output_file}") | |
| print(f" CSV: {csv_output_file}") | |
| if missing_entries: | |
| print(f" Missing CSV: {missing_csv_file}") | |
| if __name__ == '__main__': | |
| main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment