habil · December 22, 2025 10:20
diff --git a/gistfile1.txt b/gistfile1.txt
 #!/usr/bin/env python3
 """Fix Turkish character encoding issues in all CSV files."""

 import csv
 import os
 from pathlib import Path

 def fix_turkish_encoding(text):
    """Fix Turkish character encoding issues character by character."""
    if not isinstance(text, str):
        text = str(text)
    
    # Character-by-character replacements only
    char_map = {
        'Ã¼': 'ü', 'Ã¾': 'ş', 'Ã°': 'ğ', 'Ã½': 'ı', 'Ã¶': 'ö', 'Ã§': 'ç',
        'Ãœ': 'Ü', 'Ã': 'Ş', 'Ã': 'Ğ', 'Ä°': 'İ', 'Ã': 'Ö', 'Ã': 'Ç',
        'þ': 'ş', 'ð': 'ğ', 'ý': 'ı', 'ü': 'ü', 'ö': 'ö', 'ç': 'ç',
        'Þ': 'Ş', 'Ð': 'Ğ', 'Ý': 'I', 'Ü': 'Ü', 'Ö': 'Ö', 'Ç': 'Ç'
    }
    
    result = text
    for wrong_char, correct_char in char_map.items():
        result = result.replace(wrong_char, correct_char)
    
    return result

 def process_csv_file(file_path):
    """Process a single CSV file to fix encoding issues."""
    print(f"Processing {file_path.name}...")
    
    # Try different encodings to read the file
    encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1', 'windows-1252']
    
    for encoding in encodings:
        try:
            with open(file_path, 'r', encoding=encoding) as f:
                reader = csv.DictReader(f)
                rows = []
                
                for row in reader:
                    fixed_row = {}
                    for key, value in row.items():
                        fixed_key = fix_turkish_encoding(key) if key else key
                        fixed_value = fix_turkish_encoding(value) if value else value
                        fixed_row[fixed_key] = fixed_value
                    rows.append(fixed_row)
                
                # Write back as UTF-8
                with open(file_path, 'w', encoding='utf-8', newline='') as f:
                    if rows:
                        fieldnames = rows[0].keys()
                        writer = csv.DictWriter(f, fieldnames=fieldnames)
                        writer.writeheader()
                        writer.writerows(rows)
                
                print(f"  ✓ Fixed {len(rows)} rows (was {encoding})")
                return True
                
        except (UnicodeDecodeError, UnicodeError):
            continue
        except Exception as e:
            print(f"  ✗ Error with {encoding}: {e}")
            continue
    
    print(f"  ✗ Could not process {file_path.name}")
    return False

 def main():
    """Fix all CSV files in the data/csv directory."""
    csv_dir = Path('data/csv')
    
    if not csv_dir.exists():
        print(f"Directory {csv_dir} does not exist!")
        return
    
    csv_files = list(csv_dir.glob('*.csv'))
    if not csv_files:
        print(f"No CSV files found in {csv_dir}")
        return
    
    print(f"Found {len(csv_files)} CSV files to process...")
    
    success_count = 0
    for csv_file in csv_files:
        if process_csv_file(csv_file):
            success_count += 1
    
    print(f"\n✅ Successfully processed {success_count}/{len(csv_files)} CSV files")

 if __name__ == '__main__':
    main()
	#!/usr/bin/env python3
	"""Fix Turkish character encoding issues in all CSV files."""

	import csv
	import os
	from pathlib import Path

	def fix_turkish_encoding(text):
	"""Fix Turkish character encoding issues character by character."""
	if not isinstance(text, str):
	text = str(text)

	# Character-by-character replacements only
	char_map = {
	'Ã¼': 'ü', 'Ã¾': 'ş', 'Ã°': 'ğ', 'Ã½': 'ı', 'Ã¶': 'ö', 'Ã§': 'ç',
	'Ãœ': 'Ü', 'Ã': 'Ş', 'Ã': 'Ğ', 'Ä°': 'İ', 'Ã': 'Ö', 'Ã': 'Ç',
	'þ': 'ş', 'ð': 'ğ', 'ý': 'ı', 'ü': 'ü', 'ö': 'ö', 'ç': 'ç',
	'Þ': 'Ş', 'Ð': 'Ğ', 'Ý': 'I', 'Ü': 'Ü', 'Ö': 'Ö', 'Ç': 'Ç'
	}

	result = text
	for wrong_char, correct_char in char_map.items():
	result = result.replace(wrong_char, correct_char)

	return result

	def process_csv_file(file_path):
	"""Process a single CSV file to fix encoding issues."""
	print(f"Processing {file_path.name}...")

	# Try different encodings to read the file
	encodings = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1', 'windows-1252']

	for encoding in encodings:
	try:
	with open(file_path, 'r', encoding=encoding) as f:
	reader = csv.DictReader(f)
	rows = []

	for row in reader:
	fixed_row = {}
	for key, value in row.items():
	fixed_key = fix_turkish_encoding(key) if key else key
	fixed_value = fix_turkish_encoding(value) if value else value
	fixed_row[fixed_key] = fixed_value
	rows.append(fixed_row)

	# Write back as UTF-8
	with open(file_path, 'w', encoding='utf-8', newline='') as f:
	if rows:
	fieldnames = rows[0].keys()
	writer = csv.DictWriter(f, fieldnames=fieldnames)
	writer.writeheader()
	writer.writerows(rows)

	print(f" ✓ Fixed {len(rows)} rows (was {encoding})")
	return True

	except (UnicodeDecodeError, UnicodeError):
	continue
	except Exception as e:
	print(f" ✗ Error with {encoding}: {e}")
	continue

	print(f" ✗ Could not process {file_path.name}")
	return False

	def main():
	"""Fix all CSV files in the data/csv directory."""
	csv_dir = Path('data/csv')

	if not csv_dir.exists():
	print(f"Directory {csv_dir} does not exist!")
	return

	csv_files = list(csv_dir.glob('*.csv'))
	if not csv_files:
	print(f"No CSV files found in {csv_dir}")
	return

	print(f"Found {len(csv_files)} CSV files to process...")

	success_count = 0
	for csv_file in csv_files:
	if process_csv_file(csv_file):
	success_count += 1

	print(f"\n✅ Successfully processed {success_count}/{len(csv_files)} CSV files")

	if __name__ == '__main__':
	main()
No results found