#!/usr/bin/env python3 import csv import glob import os from collections import defaultdict def categorize_transaction(description): description = description.lower() if "pocket withdrawal" in description: return "Ignore" # Savings if "to pocket eur épargne" in description: return "Savings (Revolut Pocket)" # Groceries grocery_keywords = ['intermarché', 'carrefour', 'lidl', 'auchan', 'monoprix', 'e.leclerc', 'vival', 'super u', 'naturalia'] if any(keyword in description for keyword in grocery_keywords): return "Groceries" # Transport transport_keywords = ['ratp', 'sncf', 'rhônexpress', 'tam', 'spl', 'transavia'] if any(keyword in description for keyword in transport_keywords): return "Transport" # Travel travel_keywords = ['hotel', 'ote inn', 'fred 2 cow'] if any(keyword in description for keyword in travel_keywords): return "Travel" # Restaurants / Food food_keywords = [ "mcdonald's", "starbucks", "kfc", "o'tacos", "domino's", 'burger', 'sushi', 'pizza', 'restaurant', 'café', 'bar', 'boulangerie', 'patisserie', 'columbus café', 'la rotisserie', 'le petit marcel', 'maison besnier', 'pokawa', 'liban sibon', 'le paradis du fruit', 'la station', 'amorino' ] if any(keyword in description for keyword in food_keywords): return "Restaurants/Food" # Shopping shopping_keywords = ['amazon', 'fnac', 'decathlon', 'intersport', 'celio', 'jd paris velize 2', 'normal', 'rituals', 'bricorama'] if any(keyword in description for keyword in shopping_keywords): return "Shopping" # Entertainment / Leisure entertainment_keywords = [ 'ugc ciné', 'viva technology', 'pathe', 'gaumont', 'disneyland', 'parc asterix', 'spotify', 'netflix', 'bizouk', 'club', 'bar', 'yoyo', 'le loft metropolis', 'western union' ] if any(keyword in description for keyword in entertainment_keywords): return "Entertainment/Leisure" # Utilities / Bills utilities_keywords = ['mint', 'air medias', 'basic-fit', 'crossfit louvre'] if any(keyword in description for keyword in utilities_keywords): return "Utilities/Bills" # Health health_keywords = ['pharmacie', 'doctolib'] if any(keyword in description for keyword in health_keywords): return "Health" # Cash Withdrawals if 'cash withdrawal' in description: return "Cash Withdrawals" # Services services_keywords = ['barber', 'coiffeur', 'sensei barber', 'rnf lavage aut'] if any(keyword in description for keyword in services_keywords): return "Services" # Transfers (Outgoing) if description.startswith('to ') or 'cynthia sophie carvigant' in description: return 'Transfers Out' # Crypto / Investing if 'digital assets' in description or 'investment account' in description: return 'Investing/Crypto' return "Other" def process_revolut_data(output_csv=False, output_dir='../../output/csv'): expense_summary = defaultdict(float) total_expenses = 0 other_descriptions = [] all_transactions = [] # We'll analyze the full year of 2025 csv_files = glob.glob('/home/acid/Downloads/comptabilite/revolut/account-statement_2025-*.csv') for file in csv_files: with open(file, 'r', encoding='utf-8') as f: reader = csv.DictReader(f) for row in reader: try: amount = float(row['Amount']) currency = row['Currency'] if currency == 'EUR' and amount < 0: description = row['Description'] category = categorize_transaction(description) if category == "Ignore": continue # We only add the expense amount (absolute value) expense_summary[category] += abs(amount) total_expenses += abs(amount) # Store transaction for CSV output all_transactions.append({ 'Date': row['Completed Date'], 'Description': description, 'Category': category, 'Amount': abs(amount), 'Source': os.path.basename(file) }) if category == "Other": other_descriptions.append((description, abs(amount))) except (ValueError, KeyError): # Ignore rows with invalid amount or missing columns continue # Output CSV if requested if output_csv and all_transactions: csv_file = os.path.join(output_dir, 'revolut_all_transactions.csv') os.makedirs(output_dir, exist_ok=True) with open(csv_file, 'w', newline='', encoding='utf-8') as csvfile: fieldnames = ['Date', 'Description', 'Category', 'Amount', 'Source'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() writer.writerows(all_transactions) print(f"\nTransaction data saved to {csv_file}") # Print Summary print("--- Revolut Expense Summary for 2025 (Corrected) ---") print(f"Total Expenses Analyzed: €{total_expenses:,.2f}") print("\n--- Spending by Category ---") # Sort categories by amount for better readability sorted_expenses = sorted(expense_summary.items(), key=lambda item: item[1], reverse=True) for category, total in sorted_expenses: percentage = (total / total_expenses) * 100 print(f"{category:<25} €{total:9,.2f} ({percentage:5.2f}%)") print("\n--- Top 20 Uncategorized ('Other') Transactions (Post-Correction) ---") other_descriptions.sort(key=lambda x: x[1], reverse=True) for desc, amount in other_descriptions[:20]: print(f"€{amount:8.2f} - {desc}") return all_transactions if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description='Process Revolut statements') parser.add_argument('--csv-dir', default='../data/raw_csv', help='Directory containing Revolut CSV files') parser.add_argument('--output-dir', default='../../output/csv', help='Directory to save CSV output files') parser.add_argument('--csv', action='store_true', help='Output transaction data to CSV file') args = parser.parse_args() # Process all CSV files in the directory process_revolut_data(args.csv, args.output_dir)