- Fix SNCF NET PAYÉ EN EUROS extraction to correctly parse MENSUEL line - Extract month/year from PDF content instead of filename - Add new Revolut CSV processor to aggregate account statements - Organize Revolut data files into data/csv/revolut/ - Clean up redundant scripts and reports
173 lines
6.7 KiB
Python
Executable File
173 lines
6.7 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import csv
|
|
import glob
|
|
import os
|
|
from collections import defaultdict
|
|
|
|
def categorize_transaction(description):
|
|
description = description.lower()
|
|
|
|
if "pocket withdrawal" in description:
|
|
return "Ignore"
|
|
|
|
# Savings
|
|
if "to pocket eur épargne" in description:
|
|
return "Savings (Revolut Pocket)"
|
|
|
|
# Groceries
|
|
grocery_keywords = ['intermarché', 'carrefour', 'lidl', 'auchan', 'monoprix', 'e.leclerc', 'vival', 'super u', 'naturalia']
|
|
if any(keyword in description for keyword in grocery_keywords):
|
|
return "Groceries"
|
|
|
|
# Transport
|
|
transport_keywords = ['ratp', 'sncf', 'rhônexpress', 'tam', 'spl', 'transavia']
|
|
if any(keyword in description for keyword in transport_keywords):
|
|
return "Transport"
|
|
|
|
# Travel
|
|
travel_keywords = ['hotel', 'ote inn', 'fred 2 cow']
|
|
if any(keyword in description for keyword in travel_keywords):
|
|
return "Travel"
|
|
|
|
# Restaurants / Food
|
|
food_keywords = [
|
|
"mcdonald's", "starbucks", "kfc", "o'tacos", "domino's", 'burger', 'sushi',
|
|
'pizza', 'restaurant', 'café', 'bar', 'boulangerie', 'patisserie',
|
|
'columbus café', 'la rotisserie', 'le petit marcel', 'maison besnier',
|
|
'pokawa', 'liban sibon', 'le paradis du fruit', 'la station', 'amorino'
|
|
]
|
|
if any(keyword in description for keyword in food_keywords):
|
|
return "Restaurants/Food"
|
|
|
|
# Shopping
|
|
shopping_keywords = ['amazon', 'fnac', 'decathlon', 'intersport', 'celio', 'jd paris velize 2', 'normal', 'rituals', 'bricorama']
|
|
if any(keyword in description for keyword in shopping_keywords):
|
|
return "Shopping"
|
|
|
|
# Entertainment / Leisure
|
|
entertainment_keywords = [
|
|
'ugc ciné', 'viva technology', 'pathe', 'gaumont', 'disneyland', 'parc asterix',
|
|
'spotify', 'netflix', 'bizouk', 'club', 'bar', 'yoyo', 'le loft metropolis', 'western union'
|
|
]
|
|
if any(keyword in description for keyword in entertainment_keywords):
|
|
return "Entertainment/Leisure"
|
|
|
|
# Utilities / Bills
|
|
utilities_keywords = ['mint', 'air medias', 'basic-fit', 'crossfit louvre']
|
|
if any(keyword in description for keyword in utilities_keywords):
|
|
return "Utilities/Bills"
|
|
|
|
# Health
|
|
health_keywords = ['pharmacie', 'doctolib']
|
|
if any(keyword in description for keyword in health_keywords):
|
|
return "Health"
|
|
|
|
# Cash Withdrawals
|
|
if 'cash withdrawal' in description:
|
|
return "Cash Withdrawals"
|
|
|
|
# Services
|
|
services_keywords = ['barber', 'coiffeur', 'sensei barber', 'rnf lavage aut']
|
|
if any(keyword in description for keyword in services_keywords):
|
|
return "Services"
|
|
|
|
# Transfers (Outgoing)
|
|
if description.startswith('to ') or 'cynthia sophie carvigant' in description:
|
|
return 'Transfers Out'
|
|
|
|
# Crypto / Investing
|
|
if 'digital assets' in description or 'investment account' in description:
|
|
return 'Investing/Crypto'
|
|
|
|
return "Other"
|
|
|
|
def process_revolut_data(output_csv=False, output_dir='../../output/csv'):
|
|
expense_summary = defaultdict(float)
|
|
total_expenses = 0
|
|
other_descriptions = []
|
|
all_transactions = []
|
|
|
|
# We'll analyze the full year of 2025
|
|
csv_files = glob.glob('/home/acid/Downloads/comptabilite/revolut/account-statement_2025-*.csv')
|
|
|
|
for file in csv_files:
|
|
with open(file, 'r', encoding='utf-8') as f:
|
|
reader = csv.DictReader(f)
|
|
for row in reader:
|
|
try:
|
|
amount = float(row['Amount'])
|
|
currency = row['Currency']
|
|
|
|
if currency == 'EUR' and amount < 0:
|
|
description = row['Description']
|
|
category = categorize_transaction(description)
|
|
|
|
if category == "Ignore":
|
|
continue
|
|
|
|
# We only add the expense amount (absolute value)
|
|
expense_summary[category] += abs(amount)
|
|
total_expenses += abs(amount)
|
|
|
|
# Store transaction for CSV output
|
|
all_transactions.append({
|
|
'Date': row['Completed Date'],
|
|
'Description': description,
|
|
'Category': category,
|
|
'Amount': abs(amount),
|
|
'Source': os.path.basename(file)
|
|
})
|
|
|
|
if category == "Other":
|
|
other_descriptions.append((description, abs(amount)))
|
|
|
|
except (ValueError, KeyError):
|
|
# Ignore rows with invalid amount or missing columns
|
|
continue
|
|
|
|
# Output CSV if requested
|
|
if output_csv and all_transactions:
|
|
csv_file = os.path.join(output_dir, 'revolut_all_transactions.csv')
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
with open(csv_file, 'w', newline='', encoding='utf-8') as csvfile:
|
|
fieldnames = ['Date', 'Description', 'Category', 'Amount', 'Source']
|
|
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
writer.writerows(all_transactions)
|
|
print(f"\nTransaction data saved to {csv_file}")
|
|
|
|
# Print Summary
|
|
print("--- Revolut Expense Summary for 2025 (Corrected) ---")
|
|
print(f"Total Expenses Analyzed: €{total_expenses:,.2f}")
|
|
print("\n--- Spending by Category ---")
|
|
|
|
# Sort categories by amount for better readability
|
|
sorted_expenses = sorted(expense_summary.items(), key=lambda item: item[1], reverse=True)
|
|
|
|
for category, total in sorted_expenses:
|
|
percentage = (total / total_expenses) * 100
|
|
print(f"{category:<25} €{total:9,.2f} ({percentage:5.2f}%)")
|
|
|
|
print("\n--- Top 20 Uncategorized ('Other') Transactions (Post-Correction) ---")
|
|
other_descriptions.sort(key=lambda x: x[1], reverse=True)
|
|
for desc, amount in other_descriptions[:20]:
|
|
print(f"€{amount:8.2f} - {desc}")
|
|
|
|
return all_transactions
|
|
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(description='Process Revolut statements')
|
|
parser.add_argument('--csv-dir', default='../data/raw_csv',
|
|
help='Directory containing Revolut CSV files')
|
|
parser.add_argument('--output-dir', default='../../output/csv',
|
|
help='Directory to save CSV output files')
|
|
parser.add_argument('--csv', action='store_true',
|
|
help='Output transaction data to CSV file')
|
|
args = parser.parse_args()
|
|
|
|
# Process all CSV files in the directory
|
|
process_revolut_data(args.csv, args.output_dir)
|