Skip to content

Instantly share code, notes, and snippets.

@the-solipsist
Created February 5, 2026 20:51
Show Gist options
  • Select an option

  • Save the-solipsist/841fdb546df3763db49f2f706daff0f2 to your computer and use it in GitHub Desktop.

Select an option

Save the-solipsist/841fdb546df3763db49f2f706daff0f2 to your computer and use it in GitHub Desktop.
Script for conversion from {h}ledger prices to beancount prices
#!/usr/bin/env python3
"""
Convert price entries to Beancount format.
Usage: python convert_to_beancount.py prices.l > beancount_prices.bean
The converter handles:
- Currency symbol conversion (₹→INR, $→USD, £→GBP, €→EUR)
- Removal of commas from prices
- Quoted commodity names
- Multiple input formats (CURRENCY PRICE or PRICE CURRENCY)
- Commodity name normalization to meet Beancount rules:
* Converts to uppercase
* Prefixes with 'X' if starting with a number
* Replaces invalid characters with underscores
* Truncates to 24 characters max
Examples:
P 2020-03-07 "0P0000XV0N.BO" ₹5000.00 → 2020-03-07 price X0P0000XV0N.BO 5000.00 INR
P 2021-01-22 Points ₹0.15 → 2021-01-22 price POINTS 0.15 INR
P 2026-02-05 BTC USD 73,597.62 → 2026-02-05 price BTC 73597.62 USD
"""
import sys
import re
# Currency symbol mapping
CURRENCY_MAP = {
# Original unique ones
'₹': 'INR', # Indian Rupee — unique
'£': 'GBP', # Pound Sterling — unique
'€': 'EUR', # Euro — unique
'¥': 'JPY', # Japanese Yen (primary association; CNY often uses ¥ in some contexts but we prioritize uniqueness)
'₺': 'TRY', # Turkish Lira — unique modern symbol
'₽': 'RUB', # Russian Ruble — unique
'₪': 'ILS', # Israeli New Shekel — unique
'₩': 'KRW', # South Korean Won — unique
'฿': 'THB', # Thai Baht — unique
'₫': 'VND', # Vietnamese Dong — unique
'₴': 'UAH', # Ukrainian Hryvnia — unique
'₱': 'PHP', # Philippine Peso — unique
'₡': 'CRC', # Costa Rican Colón — unique
'₲': 'PYG', # Paraguayan Guaraní — unique
'﷼': 'SAR', # Saudi Riyal (primary / most common unique use; some overlap but distinct in practice)
'د.إ': 'AED', # UAE Dirham — unique
'S/': 'PEN', # Peruvian Sol — unique
'Kč': 'CZK', # Czech Koruna — unique
'zł': 'PLN', # Polish Złoty — unique
'lei': 'RON', # Romanian Leu — unique
'лв': 'BGN', # Bulgarian Lev — unique
'₸': 'KZT', # Kazakhstani Tenge — unique
'៛': 'KHR', # Cambodian Riel — unique
'ƒ': 'AWG', # Aruban Florin — unique (also used historically for Netherlands Antillean guilder, now mostly AWG)
'؋': 'AFN', # Afghan Afghani — unique
'₾': 'GEL', # Georgian Lari — unique
'B/.': 'PAB', # Panamanian Balboa — unique
'L$': 'LRD', # Liberian Dollar (distinctive variant)
'MT': 'MZN', # Mozambican Metical (unique in this form)
'R': 'ZAR', # South African Rand — conventionally unique in most global contexts
'Fr': 'CHF', # Swiss Franc (primary and distinctive when written as Fr. or CHF)
}
def clean_price(price_str):
"""Remove commas and currency symbols from price strings."""
# Remove commas
price_str = price_str.replace(',', '')
# Remove currency symbols
for symbol in CURRENCY_MAP:
price_str = price_str.replace(symbol, '')
return price_str.strip()
def normalize_commodity(commodity):
"""Normalize commodity name to meet Beancount rules.
Beancount rules:
- Must be all uppercase (A-Z)
- 1-24 characters long
- Must start and end with capital letters (not numbers)
- Middle can include: A-Z, 0-9, ', ., _, -
"""
# Remove quotes if present
commodity = commodity.strip('"')
# Convert to uppercase
commodity = commodity.upper()
# If it starts with a number, prefix with 'X'
if commodity and commodity[0].isdigit():
commodity = 'X' + commodity
# Replace any invalid characters with underscores
valid_chars = set('ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789\'._-')
commodity = ''.join(c if c in valid_chars else '_' for c in commodity)
# Ensure it doesn't end with a number (though this is actually allowed in Beancount)
# The spec says "start and end with capital letters or numbers" so ending with number is OK
# Truncate to 24 characters if needed
if len(commodity) > 24:
commodity = commodity[:24]
return commodity
def convert_currency(currency_str):
"""Convert currency symbols to ISO codes."""
currency_str = currency_str.strip().strip('"')
# If it's a currency symbol, convert it
if currency_str in CURRENCY_MAP:
return CURRENCY_MAP[currency_str]
# Normalize the currency name to meet Beancount rules
return normalize_commodity(currency_str)
def convert_line(line):
"""Convert a single price line to Beancount format."""
line = line.strip()
# Skip empty lines
if not line:
return None
# Check if line starts with P
if not line.startswith('P '):
return None
# Remove the 'P ' prefix
line = line[2:].strip()
# Split by whitespace, but respect quoted strings
import shlex
try:
parts = shlex.split(line)
except:
# Fallback to simple split if shlex fails
parts = line.split()
if len(parts) < 3:
return f"; ERROR: Not enough parts: {line}"
date = parts[0]
commodity_raw = parts[1]
# Convert and normalize commodity
commodity = normalize_commodity(convert_currency(commodity_raw))
# The rest is price information
remaining = parts[2:]
# Handle different formats:
# 1. COMMODITY PRICE (with currency symbol in price)
# 2. COMMODITY PRICE CURRENCY
# 3. COMMODITY "PRICE_COMMODITY" NUMBER
if len(remaining) == 1:
# Single token: price with embedded currency symbol or plain number
price_str = remaining[0]
# Check if it contains a currency symbol
found_currency = None
for symbol, code in CURRENCY_MAP.items():
if symbol in price_str:
found_currency = code
break
if found_currency:
price_value = clean_price(price_str)
price_currency = found_currency
else:
# Plain number, assume UNKNOWN or keep as is
price_value = clean_price(price_str)
price_currency = "UNKNOWN"
elif len(remaining) == 2:
# Two tokens: either CURRENCY PRICE or "TARGET_COMMODITY" NUMBER
first_token = remaining[0]
second_token = remaining[1]
# Try to determine which token is the price (numeric) and which is currency
first_is_number = False
second_is_number = False
try:
float(clean_price(first_token))
first_is_number = True
except:
pass
try:
float(clean_price(second_token))
second_is_number = True
except:
pass
if first_is_number and not second_is_number:
# Format: COMMODITY PRICE CURRENCY (e.g., BTC 73597.62 USD)
price_value = clean_price(first_token)
price_currency = convert_currency(second_token)
elif not first_is_number and second_is_number:
# Format: COMMODITY CURRENCY PRICE (e.g., BTC USD 73597.62)
price_currency = convert_currency(first_token)
price_value = clean_price(second_token)
elif first_is_number and second_is_number:
# Both are numbers - this is the "COMMODITY" NUMBER format
# e.g., "GOLD916" 10 or "0P0000XV0N.BO" 1.0000
# In this case, first token should have been the target commodity
# This shouldn't happen with our current parsing, but handle it
price_value = second_token
price_currency = first_token
else:
# Neither is clearly a number - treat as PRICE CURRENCY
price_value = clean_price(first_token)
price_currency = convert_currency(second_token)
else:
return f"; ERROR: Too many parts: {line}"
# Convert to Beancount format
return f"{date} price {commodity} {price_value} {price_currency}"
def main():
if len(sys.argv) > 1:
filename = sys.argv[1]
with open(filename, 'r', encoding='utf-8') as f:
lines = f.readlines()
else:
lines = sys.stdin.readlines()
for line in lines:
converted = convert_line(line)
if converted:
print(converted)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment