Created
February 5, 2026 20:51
-
-
Save the-solipsist/841fdb546df3763db49f2f706daff0f2 to your computer and use it in GitHub Desktop.
Script for conversion from {h}ledger prices to beancount prices
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| """ | |
| Convert price entries to Beancount format. | |
| Usage: python convert_to_beancount.py prices.l > beancount_prices.bean | |
| The converter handles: | |
| - Currency symbol conversion (₹→INR, $→USD, £→GBP, €→EUR) | |
| - Removal of commas from prices | |
| - Quoted commodity names | |
| - Multiple input formats (CURRENCY PRICE or PRICE CURRENCY) | |
| - Commodity name normalization to meet Beancount rules: | |
| * Converts to uppercase | |
| * Prefixes with 'X' if starting with a number | |
| * Replaces invalid characters with underscores | |
| * Truncates to 24 characters max | |
| Examples: | |
| P 2020-03-07 "0P0000XV0N.BO" ₹5000.00 → 2020-03-07 price X0P0000XV0N.BO 5000.00 INR | |
| P 2021-01-22 Points ₹0.15 → 2021-01-22 price POINTS 0.15 INR | |
| P 2026-02-05 BTC USD 73,597.62 → 2026-02-05 price BTC 73597.62 USD | |
| """ | |
| import sys | |
| import re | |
| # Currency symbol mapping | |
| CURRENCY_MAP = { | |
| # Original unique ones | |
| '₹': 'INR', # Indian Rupee — unique | |
| '£': 'GBP', # Pound Sterling — unique | |
| '€': 'EUR', # Euro — unique | |
| '¥': 'JPY', # Japanese Yen (primary association; CNY often uses ¥ in some contexts but we prioritize uniqueness) | |
| '₺': 'TRY', # Turkish Lira — unique modern symbol | |
| '₽': 'RUB', # Russian Ruble — unique | |
| '₪': 'ILS', # Israeli New Shekel — unique | |
| '₩': 'KRW', # South Korean Won — unique | |
| '฿': 'THB', # Thai Baht — unique | |
| '₫': 'VND', # Vietnamese Dong — unique | |
| '₴': 'UAH', # Ukrainian Hryvnia — unique | |
| '₱': 'PHP', # Philippine Peso — unique | |
| '₡': 'CRC', # Costa Rican Colón — unique | |
| '₲': 'PYG', # Paraguayan Guaraní — unique | |
| '﷼': 'SAR', # Saudi Riyal (primary / most common unique use; some overlap but distinct in practice) | |
| 'د.إ': 'AED', # UAE Dirham — unique | |
| 'S/': 'PEN', # Peruvian Sol — unique | |
| 'Kč': 'CZK', # Czech Koruna — unique | |
| 'zł': 'PLN', # Polish Złoty — unique | |
| 'lei': 'RON', # Romanian Leu — unique | |
| 'лв': 'BGN', # Bulgarian Lev — unique | |
| '₸': 'KZT', # Kazakhstani Tenge — unique | |
| '៛': 'KHR', # Cambodian Riel — unique | |
| 'ƒ': 'AWG', # Aruban Florin — unique (also used historically for Netherlands Antillean guilder, now mostly AWG) | |
| '؋': 'AFN', # Afghan Afghani — unique | |
| '₾': 'GEL', # Georgian Lari — unique | |
| 'B/.': 'PAB', # Panamanian Balboa — unique | |
| 'L$': 'LRD', # Liberian Dollar (distinctive variant) | |
| 'MT': 'MZN', # Mozambican Metical (unique in this form) | |
| 'R': 'ZAR', # South African Rand — conventionally unique in most global contexts | |
| 'Fr': 'CHF', # Swiss Franc (primary and distinctive when written as Fr. or CHF) | |
| } | |
| def clean_price(price_str): | |
| """Remove commas and currency symbols from price strings.""" | |
| # Remove commas | |
| price_str = price_str.replace(',', '') | |
| # Remove currency symbols | |
| for symbol in CURRENCY_MAP: | |
| price_str = price_str.replace(symbol, '') | |
| return price_str.strip() | |
| def normalize_commodity(commodity): | |
| """Normalize commodity name to meet Beancount rules. | |
| Beancount rules: | |
| - Must be all uppercase (A-Z) | |
| - 1-24 characters long | |
| - Must start and end with capital letters (not numbers) | |
| - Middle can include: A-Z, 0-9, ', ., _, - | |
| """ | |
| # Remove quotes if present | |
| commodity = commodity.strip('"') | |
| # Convert to uppercase | |
| commodity = commodity.upper() | |
| # If it starts with a number, prefix with 'X' | |
| if commodity and commodity[0].isdigit(): | |
| commodity = 'X' + commodity | |
| # Replace any invalid characters with underscores | |
| valid_chars = set('ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789\'._-') | |
| commodity = ''.join(c if c in valid_chars else '_' for c in commodity) | |
| # Ensure it doesn't end with a number (though this is actually allowed in Beancount) | |
| # The spec says "start and end with capital letters or numbers" so ending with number is OK | |
| # Truncate to 24 characters if needed | |
| if len(commodity) > 24: | |
| commodity = commodity[:24] | |
| return commodity | |
| def convert_currency(currency_str): | |
| """Convert currency symbols to ISO codes.""" | |
| currency_str = currency_str.strip().strip('"') | |
| # If it's a currency symbol, convert it | |
| if currency_str in CURRENCY_MAP: | |
| return CURRENCY_MAP[currency_str] | |
| # Normalize the currency name to meet Beancount rules | |
| return normalize_commodity(currency_str) | |
| def convert_line(line): | |
| """Convert a single price line to Beancount format.""" | |
| line = line.strip() | |
| # Skip empty lines | |
| if not line: | |
| return None | |
| # Check if line starts with P | |
| if not line.startswith('P '): | |
| return None | |
| # Remove the 'P ' prefix | |
| line = line[2:].strip() | |
| # Split by whitespace, but respect quoted strings | |
| import shlex | |
| try: | |
| parts = shlex.split(line) | |
| except: | |
| # Fallback to simple split if shlex fails | |
| parts = line.split() | |
| if len(parts) < 3: | |
| return f"; ERROR: Not enough parts: {line}" | |
| date = parts[0] | |
| commodity_raw = parts[1] | |
| # Convert and normalize commodity | |
| commodity = normalize_commodity(convert_currency(commodity_raw)) | |
| # The rest is price information | |
| remaining = parts[2:] | |
| # Handle different formats: | |
| # 1. COMMODITY PRICE (with currency symbol in price) | |
| # 2. COMMODITY PRICE CURRENCY | |
| # 3. COMMODITY "PRICE_COMMODITY" NUMBER | |
| if len(remaining) == 1: | |
| # Single token: price with embedded currency symbol or plain number | |
| price_str = remaining[0] | |
| # Check if it contains a currency symbol | |
| found_currency = None | |
| for symbol, code in CURRENCY_MAP.items(): | |
| if symbol in price_str: | |
| found_currency = code | |
| break | |
| if found_currency: | |
| price_value = clean_price(price_str) | |
| price_currency = found_currency | |
| else: | |
| # Plain number, assume UNKNOWN or keep as is | |
| price_value = clean_price(price_str) | |
| price_currency = "UNKNOWN" | |
| elif len(remaining) == 2: | |
| # Two tokens: either CURRENCY PRICE or "TARGET_COMMODITY" NUMBER | |
| first_token = remaining[0] | |
| second_token = remaining[1] | |
| # Try to determine which token is the price (numeric) and which is currency | |
| first_is_number = False | |
| second_is_number = False | |
| try: | |
| float(clean_price(first_token)) | |
| first_is_number = True | |
| except: | |
| pass | |
| try: | |
| float(clean_price(second_token)) | |
| second_is_number = True | |
| except: | |
| pass | |
| if first_is_number and not second_is_number: | |
| # Format: COMMODITY PRICE CURRENCY (e.g., BTC 73597.62 USD) | |
| price_value = clean_price(first_token) | |
| price_currency = convert_currency(second_token) | |
| elif not first_is_number and second_is_number: | |
| # Format: COMMODITY CURRENCY PRICE (e.g., BTC USD 73597.62) | |
| price_currency = convert_currency(first_token) | |
| price_value = clean_price(second_token) | |
| elif first_is_number and second_is_number: | |
| # Both are numbers - this is the "COMMODITY" NUMBER format | |
| # e.g., "GOLD916" 10 or "0P0000XV0N.BO" 1.0000 | |
| # In this case, first token should have been the target commodity | |
| # This shouldn't happen with our current parsing, but handle it | |
| price_value = second_token | |
| price_currency = first_token | |
| else: | |
| # Neither is clearly a number - treat as PRICE CURRENCY | |
| price_value = clean_price(first_token) | |
| price_currency = convert_currency(second_token) | |
| else: | |
| return f"; ERROR: Too many parts: {line}" | |
| # Convert to Beancount format | |
| return f"{date} price {commodity} {price_value} {price_currency}" | |
| def main(): | |
| if len(sys.argv) > 1: | |
| filename = sys.argv[1] | |
| with open(filename, 'r', encoding='utf-8') as f: | |
| lines = f.readlines() | |
| else: | |
| lines = sys.stdin.readlines() | |
| for line in lines: | |
| converted = convert_line(line) | |
| if converted: | |
| print(converted) | |
| if __name__ == '__main__': | |
| main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment