Reorganize project structure with separate directories for scripts, data, and output

2026-02-09 10:20:55 +01:00
parent acb1276b38
commit 73ff2b70f7
125 changed files with 1786 additions and 56 deletions
--- a/scripts/process_expenses.py
+++ b/scripts/process_expenses.py
@@ -0,0 +1,171 @@
+
+import csv
+import glob
+import os
+from collections import defaultdict
+
+def categorize_transaction(description):
+    description = description.lower()
+    
+    if "pocket withdrawal" in description:
+        return "Ignore"
+
+    # Savings
+    if "to pocket eur épargne" in description:
+        return "Savings (Revolut Pocket)"
+
+    # Groceries
+    grocery_keywords = ['intermarché', 'carrefour', 'lidl', 'auchan', 'monoprix', 'e.leclerc', 'vival', 'super u', 'naturalia']
+    if any(keyword in description for keyword in grocery_keywords):
+        return "Groceries"
+
+    # Transport
+    transport_keywords = ['ratp', 'sncf', 'rhônexpress', 'tam', 'spl', 'transavia']
+    if any(keyword in description for keyword in transport_keywords):
+        return "Transport"
+
+    # Travel
+    travel_keywords = ['hotel', 'ote inn', 'fred 2 cow']
+    if any(keyword in description for keyword in travel_keywords):
+        return "Travel"
+
+    # Restaurants / Food
+    food_keywords = [
+        "mcdonald's", "starbucks", "kfc", "o'tacos", "domino's", 'burger', 'sushi', 
+        'pizza', 'restaurant', 'café', 'bar', 'boulangerie', 'patisserie', 
+        'columbus café', 'la rotisserie', 'le petit marcel', 'maison besnier',
+        'pokawa', 'liban sibon', 'le paradis du fruit', 'la station', 'amorino'
+    ]
+    if any(keyword in description for keyword in food_keywords):
+        return "Restaurants/Food"
+
+    # Shopping
+    shopping_keywords = ['amazon', 'fnac', 'decathlon', 'intersport', 'celio', 'jd paris velize 2', 'normal', 'rituals', 'bricorama']
+    if any(keyword in description for keyword in shopping_keywords):
+        return "Shopping"
+
+    # Entertainment / Leisure
+    entertainment_keywords = [
+        'ugc ciné', 'viva technology', 'pathe', 'gaumont', 'disneyland', 'parc asterix', 
+        'spotify', 'netflix', 'bizouk', 'club', 'bar', 'yoyo', 'le loft metropolis', 'western union'
+    ]
+    if any(keyword in description for keyword in entertainment_keywords):
+        return "Entertainment/Leisure"
+
+    # Utilities / Bills
+    utilities_keywords = ['mint', 'air medias', 'basic-fit', 'crossfit louvre']
+    if any(keyword in description for keyword in utilities_keywords):
+        return "Utilities/Bills"
+
+    # Health
+    health_keywords = ['pharmacie', 'doctolib']
+    if any(keyword in description for keyword in health_keywords):
+        return "Health"
+
+    # Cash Withdrawals
+    if 'cash withdrawal' in description:
+        return "Cash Withdrawals"
+
+    # Services
+    services_keywords = ['barber', 'coiffeur', 'sensei barber', 'rnf lavage aut']
+    if any(keyword in description for keyword in services_keywords):
+        return "Services"
+        
+    # Transfers (Outgoing)
+    if description.startswith('to ') or 'cynthia sophie carvigant' in description:
+        return 'Transfers Out'
+
+    # Crypto / Investing
+    if 'digital assets' in description or 'investment account' in description:
+        return 'Investing/Crypto'
+        
+    return "Other"
+
+def process_revolut_data(output_csv=False, output_dir='../../output/csv'):
+    expense_summary = defaultdict(float)
+    total_expenses = 0
+    other_descriptions = []
+    all_transactions = []
+
+    # We'll analyze the full year of 2025
+    csv_files = glob.glob('/home/acid/Downloads/comptabilite/revolut/account-statement_2025-*.csv')
+
+    for file in csv_files:
+        with open(file, 'r', encoding='utf-8') as f:
+            reader = csv.DictReader(f)
+            for row in reader:
+                try:
+                    amount = float(row['Amount'])
+                    currency = row['Currency']
+
+                    if currency == 'EUR' and amount < 0:
+                        description = row['Description']
+                        category = categorize_transaction(description)
+
+                        if category == "Ignore":
+                            continue
+
+                        # We only add the expense amount (absolute value)
+                        expense_summary[category] += abs(amount)
+                        total_expenses += abs(amount)
+                        
+                        # Store transaction for CSV output
+                        all_transactions.append({
+                            'Date': row['Completed Date'],
+                            'Description': description,
+                            'Category': category,
+                            'Amount': abs(amount),
+                            'Source': os.path.basename(file)
+                        })
+                        
+                        if category == "Other":
+                            other_descriptions.append((description, abs(amount)))
+
+                except (ValueError, KeyError):
+                    # Ignore rows with invalid amount or missing columns
+                    continue
+    
+    # Output CSV if requested
+    if output_csv and all_transactions:
+        csv_file = os.path.join(output_dir, 'revolut_all_transactions.csv')
+        os.makedirs(output_dir, exist_ok=True)
+        with open(csv_file, 'w', newline='', encoding='utf-8') as csvfile:
+            fieldnames = ['Date', 'Description', 'Category', 'Amount', 'Source']
+            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+            writer.writeheader()
+            writer.writerows(all_transactions)
+        print(f"\nTransaction data saved to {csv_file}")
+
+    # Print Summary
+    print("--- Revolut Expense Summary for 2025 (Corrected) ---")
+    print(f"Total Expenses Analyzed: €{total_expenses:,.2f}")
+    print("\n--- Spending by Category ---")
+
+    # Sort categories by amount for better readability
+    sorted_expenses = sorted(expense_summary.items(), key=lambda item: item[1], reverse=True)
+
+    for category, total in sorted_expenses:
+        percentage = (total / total_expenses) * 100
+        print(f"{category:<25} €{total:9,.2f} ({percentage:5.2f}%)")
+        
+    print("\n--- Top 20 Uncategorized ('Other') Transactions (Post-Correction) ---")
+    other_descriptions.sort(key=lambda x: x[1], reverse=True)
+    for desc, amount in other_descriptions[:20]:
+        print(f"€{amount:8.2f} - {desc}")
+    
+    return all_transactions
+
+if __name__ == "__main__":
+    import argparse
+    
+    parser = argparse.ArgumentParser(description='Process Revolut statements')
+    parser.add_argument('--csv-dir', default='../data/raw_csv', 
+                       help='Directory containing Revolut CSV files')
+    parser.add_argument('--output-dir', default='../../output/csv', 
+                       help='Directory to save CSV output files')
+    parser.add_argument('--csv', action='store_true', 
+                       help='Output transaction data to CSV file')
+    args = parser.parse_args()
+    
+    # Process all CSV files in the directory
+    process_revolut_data(args.csv, args.output_dir)