111 lines
4.5 KiB
Python
111 lines
4.5 KiB
Python
|
|
import subprocess
|
|
import re
|
|
import csv
|
|
import os
|
|
from collections import defaultdict
|
|
|
|
def categorize_amex_transaction(description):
|
|
description = description.lower()
|
|
|
|
if any(keyword in description for keyword in ['carrefour', 'run market', 'intermarche']):
|
|
return 'Groceries'
|
|
if any(keyword in description for keyword in ['esko bar', 'le choka bleu', 'columbus cafe']):
|
|
return 'Restaurants/Food'
|
|
if any(keyword in description for keyword in ['openrouter', 'stripe-z.ai', 'claude.ai', 'ama eu sarl prime_new', 'scaleway', 'servperso* invoice pro']):
|
|
return 'Online Services/Subscriptions'
|
|
if any(keyword in description for keyword in ['air austral', 'run duty free', 'lm saint louis leroym4']):
|
|
return 'Travel'
|
|
if any(keyword in description for keyword in ['mon brico', 'sumup*kulture metisse', 'sumup*glamport', 'relay']):
|
|
return 'Shopping'
|
|
|
|
return 'Other'
|
|
|
|
def process_amex_files(file_list, output_csv=False):
|
|
expense_summary = defaultdict(float)
|
|
total_expenses = 0
|
|
all_transactions = []
|
|
|
|
for file_path in file_list:
|
|
try:
|
|
result = subprocess.run(['pdftotext', '-layout', file_path, '-'], capture_output=True, text=True, check=True)
|
|
content = result.stdout
|
|
except (subprocess.CalledProcessError, FileNotFoundError) as e:
|
|
print(f"Error processing {file_path}: {e}")
|
|
continue
|
|
|
|
# Regex for amex transactions
|
|
transaction_regex = re.compile(r'(\d{1,2} \w{3})\s+\d{1,2} \w{3}\s+(.*?)\s+([\d,.]+)$(?<!CR$)', re.MULTILINE)
|
|
|
|
lines = content.split('\n')
|
|
for line in lines:
|
|
# A simple heuristic to find transaction lines
|
|
if re.match(r'\d{1,2} \w{3}', line) and not line.endswith('CR'):
|
|
parts = line.split()
|
|
if len(parts) > 3:
|
|
try:
|
|
date = parts[0] + ' ' + parts[1]
|
|
amount_str = parts[-1].replace(',', '.')
|
|
amount = float(amount_str)
|
|
description = ' '.join(parts[2:-1])
|
|
|
|
category = categorize_amex_transaction(description)
|
|
expense_summary[category] += amount
|
|
total_expenses += amount
|
|
|
|
# Store transaction for CSV output
|
|
all_transactions.append({
|
|
'Date': date,
|
|
'Description': description,
|
|
'Category': category,
|
|
'Amount': amount,
|
|
'Source': os.path.basename(file_path)
|
|
})
|
|
except (ValueError, IndexError):
|
|
continue
|
|
|
|
# Output CSV if requested
|
|
if output_csv and all_transactions:
|
|
csv_file = 'american_express_all_transactions.csv'
|
|
with open(csv_file, 'w', newline='', encoding='utf-8') as csvfile:
|
|
fieldnames = ['Date', 'Description', 'Category', 'Amount', 'Source']
|
|
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
writer.writerows(all_transactions)
|
|
print(f"\nTransaction data saved to {csv_file}")
|
|
|
|
print("--- American Express Expense Summary for 2025 ---")
|
|
print(f"Total Expenses Analyzed: €{total_expenses:,.2f}")
|
|
print("\n--- Spending by Category ---")
|
|
|
|
sorted_expenses = sorted(expense_summary.items(), key=lambda item: item[1], reverse=True)
|
|
|
|
if total_expenses > 0:
|
|
for category, total in sorted_expenses:
|
|
percentage = (total / total_expenses) * 100
|
|
print(f"{category:<25} €{total:9,.2f} ({percentage:5.2f}%)")
|
|
else:
|
|
print("No expenses found.")
|
|
|
|
return all_transactions
|
|
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
import glob
|
|
|
|
parser = argparse.ArgumentParser(description='Process American Express statements')
|
|
parser.add_argument('--pdf-dir', default='american.express',
|
|
help='Directory containing American Express PDF files')
|
|
parser.add_argument('--csv', action='store_true',
|
|
help='Output transaction data to CSV files')
|
|
args = parser.parse_args()
|
|
|
|
# Get all PDF files in the directory
|
|
pdf_files = glob.glob(os.path.join(args.pdf_dir, "*.pdf"))
|
|
|
|
# Sort files by date if possible
|
|
pdf_files.sort()
|
|
|
|
# Process all PDF files in the directory
|
|
process_amex_files(pdf_files, args.csv)
|