#!/usr/bin/env python3 """ Script to aggregate all account statements by month or year """ import os import csv import sys import argparse import re from datetime import datetime from collections import defaultdict import calendar def parse_date(date_str, source_file): """ Parse date from various formats and return normalized (year, month, day) """ # Try different date formats formats = [ '%d/%m/%Y', # DD/MM/YYYY '%m/%d/%Y', # MM/DD/YYYY (Amex format) '%Y-%m-%d', # YYYY-MM-DD (Revolut format) ] for fmt in formats: try: dt = datetime.strptime(date_str, fmt) return (dt.year, dt.month, dt.day) except ValueError: continue # Try to extract from filename (for SNCF) if 'salaire' in source_file.lower(): months = ['janvier', 'fevrier', 'mars', 'avril', 'mai', 'juin', 'juillet', 'aout', 'septembre', 'octobre', 'novembre', 'decembre'] for i, month in enumerate(months, 1): if month.lower() in source_file.lower(): year_match = re.search(r'20(\d{2})', source_file) year = int(year_match.group(1)) if year_match else datetime.now().year return (year, i, 1) # Default: return current date return (datetime.now().year, datetime.now().month, 1) def categorize_institution(source_file): """ Determine the institution based on the source filename """ source_lower = source_file.lower() if 'boursobank' in source_lower or 'releve-compte' in source_lower: return 'Boursobank' elif 'american_express' in source_lower or 'amex' in source_lower: return 'American Express' elif 'monabanq' in source_lower or 'extrait de comptes' in source_lower: return 'Monabanq' elif 'revolut' in source_lower: return 'Revolut' elif 'sncf' in source_lower or 'salaire' in source_lower: return 'SNCF' elif 'la_poste' in source_lower or '2-la.poste' in source_lower or 'releve_ccp' in source_lower: return 'La Poste' return 'Other' def process_csv_file(file_path): """ Process a CSV file and return a list of transactions """ transactions = [] institution = categorize_institution(os.path.basename(file_path)) with open(file_path, 'r', encoding='utf-8') as f: reader = csv.DictReader(f) for row in reader: # Get the date date_str = row.get('Date', '') if not date_str: continue # Parse and normalize the date year, month, day = parse_date(date_str, row.get('Source', '')) # Get amount (handle different column names) amount_str = row.get('Amount', '') or row.get('Debit', '') or row.get('Credit', '0') try: amount = float(amount_str.replace(',', '.')) if amount_str else 0 except ValueError: amount = 0 # Create transaction record transactions.append({ 'year': year, 'month': month, 'day': day, 'date_str': date_str, 'description': row.get('Description', ''), 'category': row.get('Category', 'Other'), 'amount': amount, 'institution': institution, 'source': row.get('Source', os.path.basename(file_path)) }) return transactions def main(): parser = argparse.ArgumentParser(description='Aggregate all account statements by month or year') parser.add_argument('--input-dir', default='output/csv', help='Directory containing CSV files to aggregate (default: output/csv)') parser.add_argument('--output-dir', default='output/reports', help='Directory to save aggregated reports (default: output/reports)') parser.add_argument('--annual', action='store_true', help='Create annual reports instead of monthly reports') parser.add_argument('--year', type=int, help='Generate reports for a specific year only') args = parser.parse_args() # Create output directory os.makedirs(args.output_dir, exist_ok=True) report_type = "Annual" if args.annual else "Monthly" print(f"\n{'='*60}") print(f"{report_type} Aggregation of All Account Statements") print(f"Input Directory: {os.path.abspath(args.input_dir)}") print(f"Output Directory: {os.path.abspath(args.output_dir)}") if args.year: print(f"Year Filter: {args.year}") print(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") print(f"{'='*60}") # Collect all transactions all_transactions = [] # Find all CSV files in input directory csv_files = [f for f in os.listdir(args.input_dir) if f.endswith('.csv')] if not csv_files: print(f"\nError: No CSV files found in {args.input_dir}") return # Process each CSV file for csv_file in csv_files: file_path = os.path.join(args.input_dir, csv_file) print(f"\nProcessing: {csv_file}") transactions = process_csv_file(file_path) all_transactions.extend(transactions) print(f" Found {len(transactions)} transactions") # Group transactions by month monthly_transactions = defaultdict(list) for transaction in all_transactions: key = (transaction['year'], transaction['month']) monthly_transactions[key].append(transaction) # Create monthly summary report summary_file = os.path.join(args.output_dir, 'monthly_summary.csv') with open(summary_file, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) # Header writer.writerow([ 'Year', 'Month', 'Total Income', 'Total Expenses', 'Net Balance', 'Transaction Count', 'Institutions' ]) # Process each month for year, month in sorted(monthly_transactions.keys()): transactions = monthly_transactions[(year, month)] month_name = calendar.month_name[month] # Calculate totals total_income = sum(t['amount'] for t in transactions if t['amount'] < 0) # Negative amounts are income in Revolut total_expenses = sum(t['amount'] for t in transactions if t['amount'] > 0) net_balance = total_income + total_expenses transaction_count = len(transactions) # Get unique institutions institutions = sorted(list(set(t['institution'] for t in transactions))) institutions_str = ', '.join(institutions) # Write row writer.writerow([ year, month_name, total_income, total_expenses, net_balance, transaction_count, institutions_str ]) # Create yearly summary yearly_summary = defaultdict(lambda: {'income': 0, 'expenses': 0, 'count': 0}) for transaction in all_transactions: year = transaction['year'] yearly_summary[year]['count'] += 1 if transaction['amount'] < 0: yearly_summary[year]['income'] += transaction['amount'] else: yearly_summary[year]['expenses'] += transaction['amount'] # Create yearly summary file yearly_file = os.path.join(args.output_dir, 'yearly_summary.csv') with open(yearly_file, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) writer.writerow(['Year', 'Total Income', 'Total Expenses', 'Net Balance', 'Transaction Count']) for year in sorted(yearly_summary.keys()): data = yearly_summary[year] net_balance = data['income'] + data['expenses'] writer.writerow([ year, data['income'], data['expenses'], net_balance, data['count'] ]) # Create annual reports if requested generated_files = [ os.path.basename(summary_file), os.path.basename(yearly_file) ] if args.annual: # Create annual reports for year in sorted(yearly_summary.keys()): if args.year and year != args.year: continue # Skip years not matching filter print(f"\nCreating annual report for {year}...") # Get all transactions for the year year_transactions = [t for t in all_transactions if t['year'] == year] # Group by category for the annual report categories = defaultdict(lambda: {'count': 0, 'total': 0}) for transaction in year_transactions: category = transaction['category'] amount = transaction['amount'] categories[category]['count'] += 1 categories[category]['total'] += amount # Create annual detailed report annual_file = os.path.join(args.output_dir, f'annual_report_{year}.csv') with open(annual_file, 'w', newline='', encoding='utf-8') as f: writer = csv.writer(f) writer.writerow(['Category', 'Transaction Count', 'Total Amount', 'Percentage']) year_total = sum(c['total'] for c in categories.values()) # Sort categories by total amount sorted_categories = sorted(categories.items(), key=lambda x: x[1]['total'], reverse=True) for category, data in sorted_categories: percentage = (data['total'] / year_total) * 100 if year_total != 0 else 0 writer.writerow([category, data['count'], data['total'], f"{percentage:.2f}%"]) # Create annual transactions file annual_transactions_file = os.path.join(args.output_dir, f'annual_transactions_{year}.csv') with open(annual_transactions_file, 'w', newline='', encoding='utf-8') as f: writer = csv.DictWriter(f, fieldnames=[ 'Date', 'Description', 'Category', 'Amount', 'Institution', 'Source' ]) writer.writeheader() # Sort transactions by date sorted_transactions = sorted(year_transactions, key=lambda x: (x['month'], x['day'], x['description'])) for transaction in sorted_transactions: writer.writerow({ 'Date': transaction['date_str'], 'Description': transaction['description'], 'Category': transaction['category'], 'Amount': transaction['amount'], 'Institution': transaction['institution'], 'Source': transaction['source'] }) generated_files.append(os.path.basename(annual_file)) generated_files.append(os.path.basename(annual_transactions_file)) print(f" Created {os.path.basename(annual_file)} and {os.path.basename(annual_transactions_file)}") else: # Create monthly reports (existing functionality) for year, month in sorted(monthly_transactions.keys()): month_name = calendar.month_name[month].lower() transactions = monthly_transactions[(year, month)] # Create filename detail_file = os.path.join(args.output_dir, f'transactions_{year}_{month_name}.csv') with open(detail_file, 'w', newline='', encoding='utf-8') as f: writer = csv.DictWriter(f, fieldnames=[ 'Date', 'Description', 'Category', 'Amount', 'Institution', 'Source' ]) writer.writeheader() # Sort transactions by date sorted_transactions = sorted(transactions, key=lambda x: (x['day'], x['description'])) for transaction in sorted_transactions: writer.writerow({ 'Date': transaction['date_str'], 'Description': transaction['description'], 'Category': transaction['category'], 'Amount': transaction['amount'], 'Institution': transaction['institution'], 'Source': transaction['source'] }) generated_files.append(f'transactions_{year}_{month_name}.csv') # Print summary statistics print(f"\n{'='*60}") print(f"Aggregation Complete") print(f"Total Transactions: {len(all_transactions)}") print(f"Years with Data: {len(yearly_summary)}") if not args.annual: print(f"Months with Data: {len(monthly_transactions)}") print(f"{'='*60}") # List generated files print("\nGenerated Files:") for file in generated_files: file_path = os.path.join(args.output_dir, file) if os.path.exists(file_path): file_size = os.path.getsize(file_path) print(f" - {file} ({file_size:,} bytes)") if __name__ == "__main__": main()