Skip to content

Instantly share code, notes, and snippets.

@habil
Created December 22, 2025 10:20
Show Gist options
  • Select an option

  • Save habil/297f5f70c9b8ee39650ce165dd0c3e46 to your computer and use it in GitHub Desktop.

Select an option

Save habil/297f5f70c9b8ee39650ce165dd0c3e46 to your computer and use it in GitHub Desktop.
Fix Turkish character encoding issues in all CSV files.
#!/usr/bin/env python3
"""Fix Turkish character encoding issues in all CSV files."""
import csv
import os
from pathlib import Path
def fix_turkish_encoding(text):
"""Fix Turkish character encoding issues character by character."""
if not isinstance(text, str):
text = str(text)
# Character-by-character replacements only
char_map = {
'ü': 'ü', 'þ': 'ş', 'ð': 'ğ', 'ý': 'ı', 'ö': 'ö', 'ç': 'ç',
'Ü': 'Ü', 'Ã': 'Ş', 'Ã': 'Ğ', 'İ': 'İ', 'Ã': 'Ö', 'Ã': 'Ç',
'þ': 'ş', 'ð': 'ğ', 'ý': 'ı', 'ü': 'ü', 'ö': 'ö', 'ç': 'ç',
'Þ': 'Ş', 'Ð': 'Ğ', 'Ý': 'I', 'Ü': 'Ü', 'Ö': 'Ö', 'Ç': 'Ç'
}
result = text
for wrong_char, correct_char in char_map.items():
result = result.replace(wrong_char, correct_char)
return result
def process_csv_file(file_path):
"""Process a single CSV file to fix encoding issues."""
print(f"Processing {file_path.name}...")
# Try different encodings to read the file
encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1', 'windows-1252']
for encoding in encodings:
try:
with open(file_path, 'r', encoding=encoding) as f:
reader = csv.DictReader(f)
rows = []
for row in reader:
fixed_row = {}
for key, value in row.items():
fixed_key = fix_turkish_encoding(key) if key else key
fixed_value = fix_turkish_encoding(value) if value else value
fixed_row[fixed_key] = fixed_value
rows.append(fixed_row)
# Write back as UTF-8
with open(file_path, 'w', encoding='utf-8', newline='') as f:
if rows:
fieldnames = rows[0].keys()
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(rows)
print(f" ✓ Fixed {len(rows)} rows (was {encoding})")
return True
except (UnicodeDecodeError, UnicodeError):
continue
except Exception as e:
print(f" ✗ Error with {encoding}: {e}")
continue
print(f" ✗ Could not process {file_path.name}")
return False
def main():
"""Fix all CSV files in the data/csv directory."""
csv_dir = Path('data/csv')
if not csv_dir.exists():
print(f"Directory {csv_dir} does not exist!")
return
csv_files = list(csv_dir.glob('*.csv'))
if not csv_files:
print(f"No CSV files found in {csv_dir}")
return
print(f"Found {len(csv_files)} CSV files to process...")
success_count = 0
for csv_file in csv_files:
if process_csv_file(csv_file):
success_count += 1
print(f"\n✅ Successfully processed {success_count}/{len(csv_files)} CSV files")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment