Created
December 22, 2025 10:20
-
-
Save habil/297f5f70c9b8ee39650ce165dd0c3e46 to your computer and use it in GitHub Desktop.
Fix Turkish character encoding issues in all CSV files.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """Fix Turkish character encoding issues in all CSV files.""" | |
| import csv | |
| import os | |
| from pathlib import Path | |
| def fix_turkish_encoding(text): | |
| """Fix Turkish character encoding issues character by character.""" | |
| if not isinstance(text, str): | |
| text = str(text) | |
| # Character-by-character replacements only | |
| char_map = { | |
| 'ü': 'ü', 'þ': 'ş', 'ð': 'ğ', 'ý': 'ı', 'ö': 'ö', 'ç': 'ç', | |
| 'Ü': 'Ü', 'Ã': 'Ş', 'Ã': 'Ğ', 'İ': 'İ', 'Ã': 'Ö', 'Ã': 'Ç', | |
| 'þ': 'ş', 'ð': 'ğ', 'ý': 'ı', 'ü': 'ü', 'ö': 'ö', 'ç': 'ç', | |
| 'Þ': 'Ş', 'Ð': 'Ğ', 'Ý': 'I', 'Ü': 'Ü', 'Ö': 'Ö', 'Ç': 'Ç' | |
| } | |
| result = text | |
| for wrong_char, correct_char in char_map.items(): | |
| result = result.replace(wrong_char, correct_char) | |
| return result | |
| def process_csv_file(file_path): | |
| """Process a single CSV file to fix encoding issues.""" | |
| print(f"Processing {file_path.name}...") | |
| # Try different encodings to read the file | |
| encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1', 'windows-1252'] | |
| for encoding in encodings: | |
| try: | |
| with open(file_path, 'r', encoding=encoding) as f: | |
| reader = csv.DictReader(f) | |
| rows = [] | |
| for row in reader: | |
| fixed_row = {} | |
| for key, value in row.items(): | |
| fixed_key = fix_turkish_encoding(key) if key else key | |
| fixed_value = fix_turkish_encoding(value) if value else value | |
| fixed_row[fixed_key] = fixed_value | |
| rows.append(fixed_row) | |
| # Write back as UTF-8 | |
| with open(file_path, 'w', encoding='utf-8', newline='') as f: | |
| if rows: | |
| fieldnames = rows[0].keys() | |
| writer = csv.DictWriter(f, fieldnames=fieldnames) | |
| writer.writeheader() | |
| writer.writerows(rows) | |
| print(f" ✓ Fixed {len(rows)} rows (was {encoding})") | |
| return True | |
| except (UnicodeDecodeError, UnicodeError): | |
| continue | |
| except Exception as e: | |
| print(f" ✗ Error with {encoding}: {e}") | |
| continue | |
| print(f" ✗ Could not process {file_path.name}") | |
| return False | |
| def main(): | |
| """Fix all CSV files in the data/csv directory.""" | |
| csv_dir = Path('data/csv') | |
| if not csv_dir.exists(): | |
| print(f"Directory {csv_dir} does not exist!") | |
| return | |
| csv_files = list(csv_dir.glob('*.csv')) | |
| if not csv_files: | |
| print(f"No CSV files found in {csv_dir}") | |
| return | |
| print(f"Found {len(csv_files)} CSV files to process...") | |
| success_count = 0 | |
| for csv_file in csv_files: | |
| if process_csv_file(csv_file): | |
| success_count += 1 | |
| print(f"\n✅ Successfully processed {success_count}/{len(csv_files)} CSV files") | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment