Reorganize project structure with separate directories for scripts, data, and output
This commit is contained in:
171
scripts/process_expenses.py
Normal file
171
scripts/process_expenses.py
Normal file
@@ -0,0 +1,171 @@
|
||||
|
||||
import csv
|
||||
import glob
|
||||
import os
|
||||
from collections import defaultdict
|
||||
|
||||
def categorize_transaction(description):
|
||||
description = description.lower()
|
||||
|
||||
if "pocket withdrawal" in description:
|
||||
return "Ignore"
|
||||
|
||||
# Savings
|
||||
if "to pocket eur épargne" in description:
|
||||
return "Savings (Revolut Pocket)"
|
||||
|
||||
# Groceries
|
||||
grocery_keywords = ['intermarché', 'carrefour', 'lidl', 'auchan', 'monoprix', 'e.leclerc', 'vival', 'super u', 'naturalia']
|
||||
if any(keyword in description for keyword in grocery_keywords):
|
||||
return "Groceries"
|
||||
|
||||
# Transport
|
||||
transport_keywords = ['ratp', 'sncf', 'rhônexpress', 'tam', 'spl', 'transavia']
|
||||
if any(keyword in description for keyword in transport_keywords):
|
||||
return "Transport"
|
||||
|
||||
# Travel
|
||||
travel_keywords = ['hotel', 'ote inn', 'fred 2 cow']
|
||||
if any(keyword in description for keyword in travel_keywords):
|
||||
return "Travel"
|
||||
|
||||
# Restaurants / Food
|
||||
food_keywords = [
|
||||
"mcdonald's", "starbucks", "kfc", "o'tacos", "domino's", 'burger', 'sushi',
|
||||
'pizza', 'restaurant', 'café', 'bar', 'boulangerie', 'patisserie',
|
||||
'columbus café', 'la rotisserie', 'le petit marcel', 'maison besnier',
|
||||
'pokawa', 'liban sibon', 'le paradis du fruit', 'la station', 'amorino'
|
||||
]
|
||||
if any(keyword in description for keyword in food_keywords):
|
||||
return "Restaurants/Food"
|
||||
|
||||
# Shopping
|
||||
shopping_keywords = ['amazon', 'fnac', 'decathlon', 'intersport', 'celio', 'jd paris velize 2', 'normal', 'rituals', 'bricorama']
|
||||
if any(keyword in description for keyword in shopping_keywords):
|
||||
return "Shopping"
|
||||
|
||||
# Entertainment / Leisure
|
||||
entertainment_keywords = [
|
||||
'ugc ciné', 'viva technology', 'pathe', 'gaumont', 'disneyland', 'parc asterix',
|
||||
'spotify', 'netflix', 'bizouk', 'club', 'bar', 'yoyo', 'le loft metropolis', 'western union'
|
||||
]
|
||||
if any(keyword in description for keyword in entertainment_keywords):
|
||||
return "Entertainment/Leisure"
|
||||
|
||||
# Utilities / Bills
|
||||
utilities_keywords = ['mint', 'air medias', 'basic-fit', 'crossfit louvre']
|
||||
if any(keyword in description for keyword in utilities_keywords):
|
||||
return "Utilities/Bills"
|
||||
|
||||
# Health
|
||||
health_keywords = ['pharmacie', 'doctolib']
|
||||
if any(keyword in description for keyword in health_keywords):
|
||||
return "Health"
|
||||
|
||||
# Cash Withdrawals
|
||||
if 'cash withdrawal' in description:
|
||||
return "Cash Withdrawals"
|
||||
|
||||
# Services
|
||||
services_keywords = ['barber', 'coiffeur', 'sensei barber', 'rnf lavage aut']
|
||||
if any(keyword in description for keyword in services_keywords):
|
||||
return "Services"
|
||||
|
||||
# Transfers (Outgoing)
|
||||
if description.startswith('to ') or 'cynthia sophie carvigant' in description:
|
||||
return 'Transfers Out'
|
||||
|
||||
# Crypto / Investing
|
||||
if 'digital assets' in description or 'investment account' in description:
|
||||
return 'Investing/Crypto'
|
||||
|
||||
return "Other"
|
||||
|
||||
def process_revolut_data(output_csv=False, output_dir='../../output/csv'):
|
||||
expense_summary = defaultdict(float)
|
||||
total_expenses = 0
|
||||
other_descriptions = []
|
||||
all_transactions = []
|
||||
|
||||
# We'll analyze the full year of 2025
|
||||
csv_files = glob.glob('/home/acid/Downloads/comptabilite/revolut/account-statement_2025-*.csv')
|
||||
|
||||
for file in csv_files:
|
||||
with open(file, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
try:
|
||||
amount = float(row['Amount'])
|
||||
currency = row['Currency']
|
||||
|
||||
if currency == 'EUR' and amount < 0:
|
||||
description = row['Description']
|
||||
category = categorize_transaction(description)
|
||||
|
||||
if category == "Ignore":
|
||||
continue
|
||||
|
||||
# We only add the expense amount (absolute value)
|
||||
expense_summary[category] += abs(amount)
|
||||
total_expenses += abs(amount)
|
||||
|
||||
# Store transaction for CSV output
|
||||
all_transactions.append({
|
||||
'Date': row['Completed Date'],
|
||||
'Description': description,
|
||||
'Category': category,
|
||||
'Amount': abs(amount),
|
||||
'Source': os.path.basename(file)
|
||||
})
|
||||
|
||||
if category == "Other":
|
||||
other_descriptions.append((description, abs(amount)))
|
||||
|
||||
except (ValueError, KeyError):
|
||||
# Ignore rows with invalid amount or missing columns
|
||||
continue
|
||||
|
||||
# Output CSV if requested
|
||||
if output_csv and all_transactions:
|
||||
csv_file = os.path.join(output_dir, 'revolut_all_transactions.csv')
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
with open(csv_file, 'w', newline='', encoding='utf-8') as csvfile:
|
||||
fieldnames = ['Date', 'Description', 'Category', 'Amount', 'Source']
|
||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(all_transactions)
|
||||
print(f"\nTransaction data saved to {csv_file}")
|
||||
|
||||
# Print Summary
|
||||
print("--- Revolut Expense Summary for 2025 (Corrected) ---")
|
||||
print(f"Total Expenses Analyzed: €{total_expenses:,.2f}")
|
||||
print("\n--- Spending by Category ---")
|
||||
|
||||
# Sort categories by amount for better readability
|
||||
sorted_expenses = sorted(expense_summary.items(), key=lambda item: item[1], reverse=True)
|
||||
|
||||
for category, total in sorted_expenses:
|
||||
percentage = (total / total_expenses) * 100
|
||||
print(f"{category:<25} €{total:9,.2f} ({percentage:5.2f}%)")
|
||||
|
||||
print("\n--- Top 20 Uncategorized ('Other') Transactions (Post-Correction) ---")
|
||||
other_descriptions.sort(key=lambda x: x[1], reverse=True)
|
||||
for desc, amount in other_descriptions[:20]:
|
||||
print(f"€{amount:8.2f} - {desc}")
|
||||
|
||||
return all_transactions
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description='Process Revolut statements')
|
||||
parser.add_argument('--csv-dir', default='../data/raw_csv',
|
||||
help='Directory containing Revolut CSV files')
|
||||
parser.add_argument('--output-dir', default='../../output/csv',
|
||||
help='Directory to save CSV output files')
|
||||
parser.add_argument('--csv', action='store_true',
|
||||
help='Output transaction data to CSV file')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Process all CSV files in the directory
|
||||
process_revolut_data(args.csv, args.output_dir)
|
||||
Reference in New Issue
Block a user