Skip to content

Instantly share code, notes, and snippets.

@kokoye2007
Last active August 26, 2025 14:05
Show Gist options
  • Select an option

  • Save kokoye2007/5530370a6c165e981321d2c8f3e262f2 to your computer and use it in GitHub Desktop.

Select an option

Save kokoye2007/5530370a6c165e981321d2c8f3e262f2 to your computer and use it in GitHub Desktop.
Keymagic for Normalization | Reorder
#!/usr/bin/env python3
import argparse
import pandas as pd
def to_codepoints(s: str) -> str:
"""Convert Myanmar text to Unicode codepoints with plus signs."""
return " + ".join(f"U{ord(ch):04X}" for ch in s)
def main():
parser = argparse.ArgumentParser(description="Convert Myanmar text pairs to Unicode codepoints mapping")
parser.add_argument("-i", "--input", required=True, help="Input CSV file (2 columns: source,target)")
parser.add_argument("-o", "--output", required=True, help="Output TXT file")
args = parser.parse_args()
# Load CSV (expect 2 columns, no header)
df = pd.read_csv(args.input, header=None, names=["source", "target"])
# Convert and prepare lines
lines = []
for _, row in df.iterrows():
src = str(row["source"])
tgt = str(row["target"])
src_cp = to_codepoints(src)
tgt_cp = to_codepoints(tgt)
lines.append(f"{src_cp} => {tgt_cp} // {src} | {tgt}")
# Write to TXT
with open(args.output, "w", encoding="utf-8") as f:
f.write("\n".join(lines))
print(f"✅ Conversion complete. Output saved to {args.output}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment