Skip to content

Instantly share code, notes, and snippets.

@nickboldt
Created December 18, 2025 14:21
Show Gist options
  • Select an option

  • Save nickboldt/c41b2e46f6c5b29e2a88e03674a1e664 to your computer and use it in GitHub Desktop.

Select an option

Save nickboldt/c41b2e46f6c5b29e2a88e03674a1e664 to your computer and use it in GitHub Desktop.
compare CSV dump of Ben's working sheet with the current reality in the catalog-entities folder downstream
#!/usr/bin/env python3
"""
Compare support and provider metadata between CSV file and YAML files.
Auto-updates YAML files when CSV says "community supported" (requires --edit flag).
"""
import argparse
import csv
import os
import yaml
from pathlib import Path
from typing import Dict, List, Tuple, Optional
def parse_csv(csv_path: str) -> Dict[str, Tuple[str, str]]:
"""
Parse CSV and extract name -> (support, provider) mapping.
Column 6 (1-indexed) = proposed-1.9-status (support)
Column 10 (1-indexed) = author (provider)
"""
result = {}
with open(csv_path, 'r') as f:
reader = csv.reader(f)
header = next(reader) # Skip header
for row in reader:
if len(row) < 10:
continue
name = row[0].strip() # Column 1
support = row[5].strip() if len(row) > 5 else "" # Column 6 (0-indexed = 5)
provider = row[9].strip() if len(row) > 9 else "" # Column 10 (0-indexed = 9)
if name:
result[name] = (support, provider)
return result
def parse_yaml_file(yaml_path: str) -> Optional[Dict]:
"""Parse a YAML file and extract relevant fields."""
try:
with open(yaml_path, 'r') as f:
data = yaml.safe_load(f)
return data
except Exception as e:
print(f"Error parsing {yaml_path}: {e}")
return None
def normalize_support_level(level: str) -> str:
"""Normalize support level strings for comparison.
Note: 'community' in YAML equals 'community supported' in CSV"""
level = level.lower().strip()
# Common variations
mappings = {
'ga': 'generally-available',
'generally-available': 'generally-available',
'production': 'generally-available',
'tech-preview': 'tech-preview',
'dev-preview': 'dev-preview',
'community supported': 'community-supported',
'community-supported': 'community-supported',
'community': 'community-supported', # YAML "community" = CSV "community supported"
}
# Handle special cases like "ga (by ACS)"
if 'ga' in level and 'by' in level:
return 'generally-available'
for key, value in mappings.items():
if key in level:
return value
return level
def extract_plugin_metadata(yaml_data: Dict) -> Tuple[Optional[str], Optional[str], Optional[str]]:
"""Extract name, support level, and provider from plugin YAML."""
if not yaml_data or yaml_data.get('kind') != 'Plugin':
return None, None, None
name = yaml_data.get('metadata', {}).get('name')
spec = yaml_data.get('spec', {})
# Support can be a string or an object
support = spec.get('support')
if isinstance(support, dict):
level = support.get('level', '')
provider = support.get('provider', '')
else:
level = support or ''
provider = spec.get('author', '') # Fallback to author
return name, level, provider
def extract_package_metadata(yaml_data: Dict) -> Tuple[Optional[str], Optional[str], Optional[str]]:
"""Extract name, support level, and provider from package YAML."""
if not yaml_data or yaml_data.get('kind') != 'Package':
return None, None, None
name = yaml_data.get('metadata', {}).get('name')
spec = yaml_data.get('spec', {})
# Support can be a string or an object
support = spec.get('support')
if isinstance(support, dict):
level = support.get('level', '')
provider = support.get('provider', '')
else:
level = support or ''
provider = spec.get('author', '') # Fallback to author
# Get partOf to map package to plugin
part_of = spec.get('partOf', [])
part_of_name = part_of[0] if part_of else None
return name, level, provider, part_of_name
def update_yaml_support(yaml_path: str, new_support: str, new_provider: str = None) -> bool:
"""Update support level (and optionally provider) in a YAML file."""
try:
with open(yaml_path, 'r') as f:
data = yaml.safe_load(f)
if not data or 'spec' not in data:
return False
spec = data['spec']
# Update support field
if isinstance(spec.get('support'), dict):
# Support is an object with level and provider
spec['support']['level'] = new_support
if new_provider:
spec['support']['provider'] = new_provider
else:
# Support is a simple string - convert to object if provider needed
if new_provider:
spec['support'] = {
'level': new_support,
'provider': new_provider
}
else:
spec['support'] = new_support
# Write back with preserved formatting
with open(yaml_path, 'w') as f:
yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
return True
except Exception as e:
print(f"Error updating {yaml_path}: {e}")
return False
def main():
# Parse command line arguments
parser = argparse.ArgumentParser(
description='Compare support and provider metadata between CSV file and YAML files.'
)
parser.add_argument(
'--edit',
action='store_true',
help='Actually update YAML files (without this flag, only reports what would be changed)'
)
args = parser.parse_args()
csv_path = os.path.expanduser('~/tmp/RHDH-1.9-marketplace-plugin-metadata--working-sheet.csv')
packages_dir = '/home/nboldt/RHDH/DH/4/4-rhdh-plugin-catalog/catalog-index/catalog-entities/marketplace/packages'
plugins_dir = '/home/nboldt/RHDH/DH/4/4-rhdh-plugin-catalog/catalog-index/catalog-entities/marketplace/plugins'
# Show mode
if args.edit:
print("MODE: EDIT - Will update YAML files where CSV says 'community supported'")
else:
print("MODE: DRY RUN - Only reporting (use --edit to actually update files)")
# Parse CSV
print("\nParsing CSV file...")
csv_data = parse_csv(csv_path)
print(f"Found {len(csv_data)} entries in CSV")
# Track updates and found CSV entries
updates_made = []
updates_would_make = []
found_csv_entries = set()
# Process plugins
print("\nProcessing plugin files...")
plugin_contradictions = []
for yaml_file in sorted(Path(plugins_dir).glob('*.yaml')):
if yaml_file.name in ['all.yaml', '1-boilerplate.yaml.sample']:
continue
yaml_data = parse_yaml_file(str(yaml_file))
if not yaml_data:
continue
name, level, provider = extract_plugin_metadata(yaml_data)
if not name:
continue
# Check if name exists in CSV
if name in csv_data:
found_csv_entries.add(name)
csv_support, csv_provider = csv_data[name]
# Normalize for comparison
yaml_level_norm = normalize_support_level(level)
csv_support_norm = normalize_support_level(csv_support)
# Check for contradictions
support_mismatch = yaml_level_norm != csv_support_norm
provider_mismatch = provider and csv_provider and provider != csv_provider
if support_mismatch or provider_mismatch:
plugin_contradictions.append({
'file': yaml_file.name,
'name': name,
'type': 'plugin',
'yaml_support': level,
'csv_support': csv_support,
'yaml_provider': provider,
'csv_provider': csv_provider,
'support_mismatch': support_mismatch,
'provider_mismatch': provider_mismatch
})
# Auto-update if CSV says "community supported" -> YAML should be "community"
csv_norm = normalize_support_level(csv_support)
if csv_norm == 'community-supported':
new_yaml_value = 'community'
update_info = {
'file': yaml_file.name,
'type': 'plugin',
'old_support': level,
'new_support': new_yaml_value,
'csv_value': csv_support
}
if args.edit:
print(f" Updating {yaml_file.name} support: '{level}' → '{new_yaml_value}' (CSV: '{csv_support}')")
if update_yaml_support(str(yaml_file), new_yaml_value, csv_provider if provider_mismatch else None):
updates_made.append(update_info)
else:
print(f" Would update {yaml_file.name} support: '{level}' → '{new_yaml_value}' (CSV: '{csv_support}')")
updates_would_make.append(update_info)
# Process packages
print("Processing package files...")
package_contradictions = []
package_to_plugin = {}
for yaml_file in sorted(Path(packages_dir).glob('*.yaml')):
if yaml_file.name in ['all.yaml']:
continue
yaml_data = parse_yaml_file(str(yaml_file))
if not yaml_data:
continue
result = extract_package_metadata(yaml_data)
if len(result) == 4:
name, level, provider, part_of = result
else:
name, level, provider = result
part_of = None
if not name:
continue
# Store package->plugin mapping
if part_of:
package_to_plugin[name] = part_of
# Check against CSV using the plugin name (part_of) if available
lookup_name = part_of if part_of else name
if lookup_name in csv_data:
found_csv_entries.add(lookup_name)
csv_support, csv_provider = csv_data[lookup_name]
# Normalize for comparison
yaml_level_norm = normalize_support_level(level)
csv_support_norm = normalize_support_level(csv_support)
# Check for contradictions
support_mismatch = yaml_level_norm != csv_support_norm
provider_mismatch = provider and csv_provider and provider != csv_provider
if support_mismatch or provider_mismatch:
package_contradictions.append({
'file': yaml_file.name,
'name': name,
'part_of': part_of,
'type': 'package',
'yaml_support': level,
'csv_support': csv_support,
'yaml_provider': provider,
'csv_provider': csv_provider,
'support_mismatch': support_mismatch,
'provider_mismatch': provider_mismatch
})
# Auto-update if CSV says "community supported" -> YAML should be "community"
csv_norm = normalize_support_level(csv_support)
if csv_norm == 'community-supported':
new_yaml_value = 'community'
update_info = {
'file': yaml_file.name,
'type': 'package',
'old_support': level,
'new_support': new_yaml_value,
'csv_value': csv_support
}
if args.edit:
print(f" Updating {yaml_file.name} support: '{level}' → '{new_yaml_value}' (CSV: '{csv_support}')")
if update_yaml_support(str(yaml_file), new_yaml_value, csv_provider if provider_mismatch else None):
updates_made.append(update_info)
else:
print(f" Would update {yaml_file.name} support: '{level}' → '{new_yaml_value}' (CSV: '{csv_support}')")
updates_would_make.append(update_info)
# Print results
print("\n" + "="*80)
print("CONTRADICTIONS FOUND")
print("="*80)
if plugin_contradictions:
print(f"\n### PLUGIN CONTRADICTIONS ({len(plugin_contradictions)}) ###\n")
for item in plugin_contradictions:
print(f"File: {item['file']}")
print(f" Name: {item['name']}")
if item['support_mismatch']:
print(f" Support MISMATCH:")
print(f" YAML: {item['yaml_support']}")
print(f" CSV: {item['csv_support']}")
if item['provider_mismatch']:
print(f" Provider MISMATCH:")
print(f" YAML: {item['yaml_provider']}")
print(f" CSV: {item['csv_provider']}")
print()
else:
print("\nNo plugin contradictions found.")
if package_contradictions:
print(f"\n### PACKAGE CONTRADICTIONS ({len(package_contradictions)}) ###\n")
for item in package_contradictions:
print(f"File: {item['file']}")
print(f" Package Name: {item['name']}")
if item['part_of']:
print(f" Part Of Plugin: {item['part_of']}")
if item['support_mismatch']:
print(f" Support MISMATCH:")
print(f" YAML: {item['yaml_support']}")
print(f" CSV: {item['csv_support']}")
if item['provider_mismatch']:
print(f" Provider MISMATCH:")
print(f" YAML: {item['yaml_provider']}")
print(f" CSV: {item['csv_provider']}")
print()
else:
print("\nNo package contradictions found.")
# Find missing entries (in CSV but not in YAML files)
missing_entries = []
for csv_name in csv_data.keys():
if csv_name and csv_name not in found_csv_entries:
csv_support, csv_provider = csv_data[csv_name]
missing_entries.append({
'name': csv_name,
'csv_support': csv_support,
'csv_provider': csv_provider
})
# Summary
total = len(plugin_contradictions) + len(package_contradictions)
print("\n" + "="*80)
print(f"SUMMARY: {total} total contradictions found")
print(f" - {len(plugin_contradictions)} plugin contradictions")
print(f" - {len(package_contradictions)} package contradictions")
print("="*80)
# Missing entries summary
if missing_entries:
print(f"\n### MISSING PLUGINS/PACKAGES ({len(missing_entries)}) ###")
print("Listed in CSV but not found in YAML files:\n")
for entry in missing_entries:
print(f" - {entry['name']}")
print(f" CSV Support: {entry['csv_support']}, CSV Provider: {entry['csv_provider']}")
print(f"\nTotal missing: {len(missing_entries)}")
# Updates summary
if updates_made:
print(f"\n### UPDATES APPLIED ({len(updates_made)}) ###\n")
for update in updates_made:
print(f"✓ {update['file']} ({update['type']})")
print(f" Changed: '{update['old_support']}' → '{update['new_support']}'")
print(f"\nTotal files updated: {len(updates_made)}")
else:
print("\nNo automatic updates were applied.")
# Write to output files (text and CSV)
output_file = os.path.expanduser('~/tmp/metadata_contradictions.txt')
csv_output_file = os.path.expanduser('~/tmp/metadata_contradictions.csv')
missing_csv_file = os.path.expanduser('~/tmp/metadata_missing.csv')
# Write text file
with open(output_file, 'w') as f:
f.write("PLUGIN AND PACKAGE METADATA CONTRADICTIONS\n")
f.write("="*80 + "\n\n")
if plugin_contradictions:
f.write(f"PLUGIN CONTRADICTIONS ({len(plugin_contradictions)})\n")
f.write("-"*80 + "\n\n")
for item in plugin_contradictions:
f.write(f"File: {item['file']}\n")
f.write(f" Name: {item['name']}\n")
if item['support_mismatch']:
f.write(f" Support MISMATCH: YAML='{item['yaml_support']}' vs CSV='{item['csv_support']}'\n")
if item['provider_mismatch']:
f.write(f" Provider MISMATCH: YAML='{item['yaml_provider']}' vs CSV='{item['csv_provider']}'\n")
f.write("\n")
if package_contradictions:
f.write(f"\nPACKAGE CONTRADICTIONS ({len(package_contradictions)})\n")
f.write("-"*80 + "\n\n")
for item in package_contradictions:
f.write(f"File: {item['file']}\n")
f.write(f" Package Name: {item['name']}\n")
if item['part_of']:
f.write(f" Part Of Plugin: {item['part_of']}\n")
if item['support_mismatch']:
f.write(f" Support MISMATCH: YAML='{item['yaml_support']}' vs CSV='{item['csv_support']}'\n")
if item['provider_mismatch']:
f.write(f" Provider MISMATCH: YAML='{item['yaml_provider']}' vs CSV='{item['csv_provider']}'\n")
f.write("\n")
f.write(f"\nTOTAL: {total} contradictions\n")
# Add missing entries section
if missing_entries:
f.write(f"\n\nMISSING PLUGINS/PACKAGES ({len(missing_entries)})\n")
f.write("-"*80 + "\n")
f.write("Listed in CSV but not found in YAML files:\n\n")
for entry in missing_entries:
f.write(f"Name: {entry['name']}\n")
f.write(f" CSV Support: {entry['csv_support']}\n")
f.write(f" CSV Provider: {entry['csv_provider']}\n\n")
# Add updates section
if updates_made:
f.write(f"\n\nUPDATES APPLIED ({len(updates_made)})\n")
f.write("-"*80 + "\n\n")
for update in updates_made:
f.write(f"File: {update['file']} ({update['type']})\n")
f.write(f" Changed: '{update['old_support']}' → '{update['new_support']}'\n\n")
# Write CSV file
with open(csv_output_file, 'w', newline='') as csvfile:
fieldnames = ['Type', 'File', 'Name', 'Part Of Plugin', 'Support Mismatch',
'YAML Support', 'CSV Support', 'Provider Mismatch',
'YAML Provider', 'CSV Provider']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
# Write plugin contradictions
for item in plugin_contradictions:
writer.writerow({
'Type': 'Plugin',
'File': item['file'],
'Name': item['name'],
'Part Of Plugin': '',
'Support Mismatch': 'YES' if item['support_mismatch'] else 'NO',
'YAML Support': item['yaml_support'],
'CSV Support': item['csv_support'],
'Provider Mismatch': 'YES' if item['provider_mismatch'] else 'NO',
'YAML Provider': item['yaml_provider'],
'CSV Provider': item['csv_provider']
})
# Write package contradictions
for item in package_contradictions:
writer.writerow({
'Type': 'Package',
'File': item['file'],
'Name': item['name'],
'Part Of Plugin': item.get('part_of', ''),
'Support Mismatch': 'YES' if item['support_mismatch'] else 'NO',
'YAML Support': item['yaml_support'],
'CSV Support': item['csv_support'],
'Provider Mismatch': 'YES' if item['provider_mismatch'] else 'NO',
'YAML Provider': item['yaml_provider'],
'CSV Provider': item['csv_provider']
})
# Write missing entries CSV file
if missing_entries:
with open(missing_csv_file, 'w', newline='') as csvfile:
fieldnames = ['Name', 'CSV Support', 'CSV Provider', 'Status']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for entry in missing_entries:
writer.writerow({
'Name': entry['name'],
'CSV Support': entry['csv_support'],
'CSV Provider': entry['csv_provider'],
'Status': 'NOT FOUND in YAML files'
})
print(f"\nResults written to:")
print(f" Text: {output_file}")
print(f" CSV: {csv_output_file}")
if missing_entries:
print(f" Missing CSV: {missing_csv_file}")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment