Files
personnal-accounting/scripts/process_expenses.py
Kevin Bataille eb66c7a43e Refactor SNCF processor and add Revolut aggregator
- Fix SNCF NET PAYÉ EN EUROS extraction to correctly parse MENSUEL line
- Extract month/year from PDF content instead of filename
- Add new Revolut CSV processor to aggregate account statements
- Organize Revolut data files into data/csv/revolut/
- Clean up redundant scripts and reports
2026-02-09 16:17:48 +01:00

173 lines
6.7 KiB
Python
Executable File

#!/usr/bin/env python3
import csv
import glob
import os
from collections import defaultdict
def categorize_transaction(description):
description = description.lower()
if "pocket withdrawal" in description:
return "Ignore"
# Savings
if "to pocket eur épargne" in description:
return "Savings (Revolut Pocket)"
# Groceries
grocery_keywords = ['intermarché', 'carrefour', 'lidl', 'auchan', 'monoprix', 'e.leclerc', 'vival', 'super u', 'naturalia']
if any(keyword in description for keyword in grocery_keywords):
return "Groceries"
# Transport
transport_keywords = ['ratp', 'sncf', 'rhônexpress', 'tam', 'spl', 'transavia']
if any(keyword in description for keyword in transport_keywords):
return "Transport"
# Travel
travel_keywords = ['hotel', 'ote inn', 'fred 2 cow']
if any(keyword in description for keyword in travel_keywords):
return "Travel"
# Restaurants / Food
food_keywords = [
"mcdonald's", "starbucks", "kfc", "o'tacos", "domino's", 'burger', 'sushi',
'pizza', 'restaurant', 'café', 'bar', 'boulangerie', 'patisserie',
'columbus café', 'la rotisserie', 'le petit marcel', 'maison besnier',
'pokawa', 'liban sibon', 'le paradis du fruit', 'la station', 'amorino'
]
if any(keyword in description for keyword in food_keywords):
return "Restaurants/Food"
# Shopping
shopping_keywords = ['amazon', 'fnac', 'decathlon', 'intersport', 'celio', 'jd paris velize 2', 'normal', 'rituals', 'bricorama']
if any(keyword in description for keyword in shopping_keywords):
return "Shopping"
# Entertainment / Leisure
entertainment_keywords = [
'ugc ciné', 'viva technology', 'pathe', 'gaumont', 'disneyland', 'parc asterix',
'spotify', 'netflix', 'bizouk', 'club', 'bar', 'yoyo', 'le loft metropolis', 'western union'
]
if any(keyword in description for keyword in entertainment_keywords):
return "Entertainment/Leisure"
# Utilities / Bills
utilities_keywords = ['mint', 'air medias', 'basic-fit', 'crossfit louvre']
if any(keyword in description for keyword in utilities_keywords):
return "Utilities/Bills"
# Health
health_keywords = ['pharmacie', 'doctolib']
if any(keyword in description for keyword in health_keywords):
return "Health"
# Cash Withdrawals
if 'cash withdrawal' in description:
return "Cash Withdrawals"
# Services
services_keywords = ['barber', 'coiffeur', 'sensei barber', 'rnf lavage aut']
if any(keyword in description for keyword in services_keywords):
return "Services"
# Transfers (Outgoing)
if description.startswith('to ') or 'cynthia sophie carvigant' in description:
return 'Transfers Out'
# Crypto / Investing
if 'digital assets' in description or 'investment account' in description:
return 'Investing/Crypto'
return "Other"
def process_revolut_data(output_csv=False, output_dir='../../output/csv'):
expense_summary = defaultdict(float)
total_expenses = 0
other_descriptions = []
all_transactions = []
# We'll analyze the full year of 2025
csv_files = glob.glob('/home/acid/Downloads/comptabilite/revolut/account-statement_2025-*.csv')
for file in csv_files:
with open(file, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
try:
amount = float(row['Amount'])
currency = row['Currency']
if currency == 'EUR' and amount < 0:
description = row['Description']
category = categorize_transaction(description)
if category == "Ignore":
continue
# We only add the expense amount (absolute value)
expense_summary[category] += abs(amount)
total_expenses += abs(amount)
# Store transaction for CSV output
all_transactions.append({
'Date': row['Completed Date'],
'Description': description,
'Category': category,
'Amount': abs(amount),
'Source': os.path.basename(file)
})
if category == "Other":
other_descriptions.append((description, abs(amount)))
except (ValueError, KeyError):
# Ignore rows with invalid amount or missing columns
continue
# Output CSV if requested
if output_csv and all_transactions:
csv_file = os.path.join(output_dir, 'revolut_all_transactions.csv')
os.makedirs(output_dir, exist_ok=True)
with open(csv_file, 'w', newline='', encoding='utf-8') as csvfile:
fieldnames = ['Date', 'Description', 'Category', 'Amount', 'Source']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(all_transactions)
print(f"\nTransaction data saved to {csv_file}")
# Print Summary
print("--- Revolut Expense Summary for 2025 (Corrected) ---")
print(f"Total Expenses Analyzed: €{total_expenses:,.2f}")
print("\n--- Spending by Category ---")
# Sort categories by amount for better readability
sorted_expenses = sorted(expense_summary.items(), key=lambda item: item[1], reverse=True)
for category, total in sorted_expenses:
percentage = (total / total_expenses) * 100
print(f"{category:<25}{total:9,.2f} ({percentage:5.2f}%)")
print("\n--- Top 20 Uncategorized ('Other') Transactions (Post-Correction) ---")
other_descriptions.sort(key=lambda x: x[1], reverse=True)
for desc, amount in other_descriptions[:20]:
print(f"{amount:8.2f} - {desc}")
return all_transactions
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description='Process Revolut statements')
parser.add_argument('--csv-dir', default='../data/raw_csv',
help='Directory containing Revolut CSV files')
parser.add_argument('--output-dir', default='../../output/csv',
help='Directory to save CSV output files')
parser.add_argument('--csv', action='store_true',
help='Output transaction data to CSV file')
args = parser.parse_args()
# Process all CSV files in the directory
process_revolut_data(args.csv, args.output_dir)