Skip to content

Instantly share code, notes, and snippets.

@dalepotter
Last active December 20, 2025 15:22
Show Gist options
  • Select an option

  • Save dalepotter/3cf719dbf21ec881e48ffda105bb3553 to your computer and use it in GitHub Desktop.

Select an option

Save dalepotter/3cf719dbf21ec881e48ffda105bb3553 to your computer and use it in GitHub Desktop.
Find which transactions sum closest to a target value (subset sum with dynamic programming)
# Test using `$ pytest closest_subset_sum.py`
# TODO
# - Strip `£` from prices if it exists
# - Normalise case differences in column headers: `date` vs. `Date`
import argparse
import csv
import os
import sys
import tempfile
import unittest
from io import StringIO
def find_best_subset(items, target):
"""
Find the subset of items that gets closest to target without exceeding it.
Uses dynamic programming (0/1 knapsack approach).
Args:
items: List of dicts with 'date', 'description', 'amount'
target: Target value (float)
Returns:
Tuple of (best_total, set of indices to include)
"""
# Work in pence to avoid floating point issues
target_pence = round(target * 100)
num_items = len(items)
# Convert amounts to pence
amounts_pence = [round(item['amount'] * 100) for item in items]
# Dynamic programming (DP) table: dp[i][w] = maximum value achievable using first i items with capacity w
dp = [[0 for _ in range(target_pence + 1)] for _ in range(num_items + 1)]
# Build the DP table
for item_index in range(1, num_items + 1):
item_value = amounts_pence[item_index - 1]
for capacity in range(target_pence + 1):
# Don't include this item
dp[item_index][capacity] = dp[item_index - 1][capacity]
# Try including this item
if item_value <= capacity:
with_item = dp[item_index - 1][capacity - item_value] + item_value
if with_item > dp[item_index][capacity]:
dp[item_index][capacity] = with_item
# Backtrack to find which items were selected
best_value_pence = dp[num_items][target_pence]
selected_indices = set()
remaining_capacity = target_pence
for item_index in range(num_items, 0, -1):
# Check if this item was included
if remaining_capacity >= amounts_pence[item_index - 1] and dp[item_index][remaining_capacity] == dp[item_index - 1][remaining_capacity - amounts_pence[item_index - 1]] + amounts_pence[item_index - 1]:
selected_indices.add(item_index - 1)
remaining_capacity -= amounts_pence[item_index - 1]
best_total = round(best_value_pence / 100, 2)
return best_total, selected_indices
def main(argv=None):
# Parse command line arguments
parser = argparse.ArgumentParser(
description='Find the subset of transactions that gets closest to a target amount without exceeding it.'
)
parser.add_argument(
'csv_filepath',
help="Path to CSV file containing transactions. CSV must have columns: 'date', 'description', and 'amount'."
)
parser.add_argument(
'target',
nargs='?',
type=float,
default=150.00,
help='Target amount to reach (default: 150.00)'
)
args = parser.parse_args(argv)
csv_filename = args.csv_filepath
target = args.target
# Read CSV file
items = []
try:
with open(csv_filename, 'r', encoding='utf-8') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
items.append({
'date': row['date'].strip(),
'description': row['description'].strip(),
'amount': float(row['amount'].strip())
})
except FileNotFoundError:
print(f"Error: File '{csv_filename}' not found.")
sys.exit(1)
except Exception as error:
print(f"Error reading CSV: {error}")
sys.exit(1)
if not items:
print("No transactions found in CSV file.")
sys.exit(1)
# Find best subset
best_total, selected_indices = find_best_subset(items, target)
# Display results
print(f"\nTarget: £{target:.2f}")
print(f"Best achievable total: £{best_total:.2f}")
print(f"Under target by: £{target - best_total:.2f}")
print(f"\n{'Date':<15} {'Description':<35} {'Amount':>10} {'Include':>10}")
print("-" * 75)
for index, item in enumerate(items):
include = "Yes" if index in selected_indices else "No"
print(f"{item['date']:<15} {item['description']:<35} £{item['amount']:>8.2f} {include:>10}")
print("-" * 75)
print(f"{'TOTAL':<51} £{best_total:>8.2f}")
print(f"\nTransactions included: {len(selected_indices)} of {len(items)}")
if __name__ == "__main__":
main()
class FindBestSubsetTests(unittest.TestCase):
def test_exact_match_single_item(self):
"""Single item that exactly matches target should be selected."""
items = [
{'date': '2024-01-01', 'description': 'Item 1', 'amount': 50.00}
]
total, indices = find_best_subset(items, 50.00)
self.assertEqual(total, 50.00)
self.assertEqual(indices, {0})
def test_exact_match_multiple_items(self):
"""Multiple items that sum to target exactly should all be selected."""
items = [
{'date': '2024-01-01', 'description': 'Item 1', 'amount': 25.00},
{'date': '2024-01-02', 'description': 'Item 2', 'amount': 25.00},
{'date': '2024-01-03', 'description': 'Item 3', 'amount': 50.00}
]
total, indices = find_best_subset(items, 100.00)
self.assertEqual(total, 100.00)
self.assertEqual(len(indices), 3)
def test_no_items(self):
"""Empty list should return 0 and empty set."""
items = []
total, indices = find_best_subset(items, 100.00)
self.assertEqual(total, 0.00)
self.assertEqual(indices, set())
def test_all_items_exceed_target(self):
"""When all items exceed target, should return 0."""
items = [
{'date': '2024-01-01', 'description': 'Item 1', 'amount': 100.00},
{'date': '2024-01-02', 'description': 'Item 2', 'amount': 200.00}
]
total, indices = find_best_subset(items, 50.00)
self.assertEqual(total, 0.00)
self.assertEqual(indices, set())
def test_closest_without_exceeding(self):
"""Should find closest sum without exceeding target."""
items = [
{'date': '2024-01-01', 'description': 'Item 1', 'amount': 30.00},
{'date': '2024-01-02', 'description': 'Item 2', 'amount': 40.00},
{'date': '2024-01-03', 'description': 'Item 3', 'amount': 50.00}
]
total, indices = find_best_subset(items, 75.00)
# Best is 30 + 40 = 70 (not 50 + 30 = 80 which exceeds)
self.assertEqual(total, 70.00)
self.assertEqual(len(indices), 2)
def test_single_item_under_target(self):
"""Single item below target should be selected."""
items = [
{'date': '2024-01-01', 'description': 'Item 1', 'amount': 30.00}
]
total, indices = find_best_subset(items, 100.00)
self.assertEqual(total, 30.00)
self.assertEqual(indices, {0})
def test_floating_point_precision(self):
"""Pence conversion should handle floating point correctly."""
items = [
{'date': '2024-01-01', 'description': 'Item 1', 'amount': 10.99},
{'date': '2024-01-02', 'description': 'Item 2', 'amount': 20.50},
{'date': '2024-01-03', 'description': 'Item 3', 'amount': 15.51}
]
total, indices = find_best_subset(items, 32.00)
# Best is 10.99 + 20.50 = 31.49
self.assertEqual(total, 31.49)
self.assertEqual(len(indices), 2)
def test_all_items_fit_under_target(self):
"""When sum of all items is less than target, select all."""
items = [
{'date': '2024-01-01', 'description': 'Item 1', 'amount': 10.00},
{'date': '2024-01-02', 'description': 'Item 2', 'amount': 20.00},
{'date': '2024-01-03', 'description': 'Item 3', 'amount': 30.00}
]
total, indices = find_best_subset(items, 100.00)
self.assertEqual(total, 60.00)
self.assertEqual(len(indices), 3)
def test_returns_correct_indices(self):
"""Verify returned indices map to correct items."""
items = [
{'date': '2024-01-01', 'description': 'Item 1', 'amount': 10.00},
{'date': '2024-01-02', 'description': 'Item 2', 'amount': 25.00},
{'date': '2024-01-03', 'description': 'Item 3', 'amount': 15.00}
]
total, indices = find_best_subset(items, 40.00)
# Should select items at indices 1 and 2 (25 + 15 = 40)
selected_sum = sum(items[idx]['amount'] for idx in indices)
self.assertEqual(total, selected_sum)
self.assertEqual(total, 40.00)
class EdgeCaseTests(unittest.TestCase):
def test_target_zero(self):
"""Target of 0 should return 0 and empty set."""
items = [
{'date': '2024-01-01', 'description': 'Item 1', 'amount': 10.00},
{'date': '2024-01-02', 'description': 'Item 2', 'amount': 20.00}
]
total, indices = find_best_subset(items, 0.00)
self.assertEqual(total, 0.00)
self.assertEqual(indices, set())
def test_very_small_amounts(self):
"""Test pence conversion with very small amounts like 0.01."""
items = [
{'date': '2024-01-01', 'description': 'Item 1', 'amount': 0.01},
{'date': '2024-01-02', 'description': 'Item 2', 'amount': 0.02},
{'date': '2024-01-03', 'description': 'Item 3', 'amount': 0.03}
]
total, indices = find_best_subset(items, 0.05)
# Best is 0.02 + 0.03 = 0.05
self.assertEqual(total, 0.05)
self.assertEqual(len(indices), 2)
def test_single_penny_precision(self):
"""Test that single penny differences are handled correctly."""
items = [
{'date': '2024-01-01', 'description': 'Item 1', 'amount': 1.23},
{'date': '2024-01-02', 'description': 'Item 2', 'amount': 4.56},
{'date': '2024-01-03', 'description': 'Item 3', 'amount': 7.89}
]
total, indices = find_best_subset(items, 5.79)
# Best is 1.23 + 4.56 = 5.79 (exact match)
self.assertEqual(total, 5.79)
self.assertEqual(len(indices), 2)
def test_many_small_items(self):
"""Test with many small items to ensure algorithm handles larger sets."""
items = [{'date': f'2024-01-{item_num:02d}', 'description': f'Item {item_num}', 'amount': 1.00}
for item_num in range(1, 21)]
total, indices = find_best_subset(items, 15.00)
self.assertEqual(total, 15.00)
self.assertEqual(len(indices), 15)
def test_duplicate_amounts(self):
"""Test with duplicate amounts to ensure correct selection."""
items = [
{'date': '2024-01-01', 'description': 'Item 1', 'amount': 10.00},
{'date': '2024-01-02', 'description': 'Item 2', 'amount': 10.00},
{'date': '2024-01-03', 'description': 'Item 3', 'amount': 10.00}
]
total, indices = find_best_subset(items, 25.00)
# Should select 2 items totaling 20.00 (can't reach 25.00)
self.assertEqual(total, 20.00)
self.assertEqual(len(indices), 2)
class CommandLineArgumentTests(unittest.TestCase):
def test_default_target_value(self):
"""When only CSV file is provided, target should default to 150.00."""
# Create a temporary CSV file
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.csv') as f:
f.write('date,description,amount\n')
f.write('2024-01-01,Test,50.00\n')
temp_csv = f.name
try:
parser = argparse.ArgumentParser()
parser.add_argument('csv_filepath')
parser.add_argument('target', nargs='?', type=float, default=150.00)
args = parser.parse_args([temp_csv])
self.assertEqual(args.csv_filepath, temp_csv)
self.assertEqual(args.target, 150.00)
finally:
os.unlink(temp_csv)
def test_custom_target_value(self):
"""When both CSV file and target are provided, target should be set."""
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.csv') as f:
f.write('date,description,amount\n')
f.write('2024-01-01,Test,50.00\n')
temp_csv = f.name
try:
parser = argparse.ArgumentParser()
parser.add_argument('csv_filepath')
parser.add_argument('target', nargs='?', type=float, default=150.00)
args = parser.parse_args([temp_csv, '200.00'])
self.assertEqual(args.csv_filepath, temp_csv)
self.assertEqual(args.target, 200.00)
finally:
os.unlink(temp_csv)
def test_target_as_integer(self):
"""Target should accept integer values and convert to float."""
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.csv') as f:
f.write('date,description,amount\n')
temp_csv = f.name
try:
parser = argparse.ArgumentParser()
parser.add_argument('csv_filepath')
parser.add_argument('target', nargs='?', type=float, default=150.00)
args = parser.parse_args([temp_csv, '100'])
self.assertEqual(args.target, 100.00)
self.assertIsInstance(args.target, float)
finally:
os.unlink(temp_csv)
def test_end_to_end_with_default_target(self):
"""End-to-end test: main() with CSV file and default target."""
# Create a temporary CSV file with test data
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.csv') as f:
f.write('date,description,amount\n')
f.write('2024-01-01,Item A,50.00\n')
f.write('2024-01-02,Item B,60.00\n')
f.write('2024-01-03,Item C,70.00\n')
temp_csv = f.name
try:
# Capture stdout
old_stdout = sys.stdout
sys.stdout = StringIO()
# Run main with just the CSV file (should use default target of 150.00)
main([temp_csv])
output = sys.stdout.getvalue()
sys.stdout = old_stdout
# Verify output contains expected information
self.assertIn('Target: £150.00', output)
self.assertIn('Best achievable total:', output)
self.assertIn('Item A', output)
self.assertIn('Item B', output)
self.assertIn('Item C', output)
finally:
os.unlink(temp_csv)
def test_end_to_end_with_custom_target(self):
"""End-to-end test: main() with CSV file and custom target."""
# Create a temporary CSV file with test data
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.csv') as f:
f.write('date,description,amount\n')
f.write('2024-01-01,Item A,25.00\n')
f.write('2024-01-02,Item B,30.00\n')
f.write('2024-01-03,Item C,45.00\n')
temp_csv = f.name
try:
# Capture stdout
old_stdout = sys.stdout
sys.stdout = StringIO()
# Run main with CSV file and custom target
main([temp_csv, '75.00'])
output = sys.stdout.getvalue()
sys.stdout = old_stdout
# Verify output contains expected information
self.assertIn('Target: £75.00', output)
self.assertIn('Best achievable total: £75.00', output) # Should be exact match (30 + 45)
finally:
os.unlink(temp_csv)
def test_end_to_end_file_not_found(self):
"""End-to-end test: main() should exit gracefully when file not found."""
old_stdout = sys.stdout
sys.stdout = StringIO()
with self.assertRaises(SystemExit) as cm:
main(['nonexistent_file.csv'])
output = sys.stdout.getvalue()
sys.stdout = old_stdout
self.assertEqual(cm.exception.code, 1)
self.assertIn('not found', output)
class IntegrationTests(unittest.TestCase):
def test_realistic_expense_scenario(self):
"""Test with realistic transaction data."""
items = [
{'date': '2024-01-05', 'description': 'Grocery Store', 'amount': 45.67},
{'date': '2024-01-08', 'description': 'Gas Station', 'amount': 52.30},
{'date': '2024-01-12', 'description': 'Restaurant', 'amount': 28.50},
{'date': '2024-01-15', 'description': 'Pharmacy', 'amount': 15.99},
{'date': '2024-01-20', 'description': 'Coffee Shop', 'amount': 8.75},
{'date': '2024-01-25', 'description': 'Bookstore', 'amount': 32.40}
]
target = 150.00
total, indices = find_best_subset(items, target)
# Verify total doesn't exceed target
self.assertLessEqual(total, target)
# Verify all selected items sum correctly
selected_sum = round(sum(items[idx]['amount'] for idx in indices), 2)
self.assertEqual(total, selected_sum)
# For this data, best should be 149.61 (all except one item)
self.assertGreater(total, 140.00)
def test_subset_choice_optimization(self):
"""Verify algorithm chooses optimal subset over greedy approach."""
items = [
{'date': '2024-01-01', 'description': 'Large item', 'amount': 60.00},
{'date': '2024-01-02', 'description': 'Medium 1', 'amount': 35.00},
{'date': '2024-01-03', 'description': 'Medium 2', 'amount': 35.00}
]
target = 70.00
total, indices = find_best_subset(items, target)
# Greedy might pick 60, but optimal is 35 + 35 = 70
self.assertEqual(total, 70.00)
self.assertEqual(len(indices), 2)
def test_mixed_currency_values(self):
"""Test with various realistic currency amounts."""
items = [
{'date': '2024-02-01', 'description': 'Subscription', 'amount': 9.99},
{'date': '2024-02-03', 'description': 'Utilities', 'amount': 123.45},
{'date': '2024-02-07', 'description': 'Insurance', 'amount': 87.50},
{'date': '2024-02-14', 'description': 'Internet', 'amount': 55.00},
{'date': '2024-02-20', 'description': 'Phone', 'amount': 45.00}
]
target = 200.00
total, indices = find_best_subset(items, target)
self.assertLessEqual(total, target)
# Verify indices are valid
for idx in indices:
self.assertLess(idx, len(items))
self.assertGreaterEqual(idx, 0)
# Verify sum
selected_sum = sum(items[idx]['amount'] for idx in indices)
self.assertAlmostEqual(total, selected_sum, places=2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment