#!/usr/bin/env python3
"""
Script to aggregate all account statements by month
"""

import os
import csv
import sys
import argparse
import re
from datetime import datetime
from collections import defaultdict
import calendar

def parse_date(date_str, source_file):
    """
    Parse date from various formats and return normalized (year, month, day)
    """
    # Try different date formats
    formats = [
        '%d/%m/%Y',  # DD/MM/YYYY
        '%m/%d/%Y',  # MM/DD/YYYY (Amex format)
        '%Y-%m-%d',  # YYYY-MM-DD (Revolut format)
    ]
    
    for fmt in formats:
        try:
            dt = datetime.strptime(date_str, fmt)
            return (dt.year, dt.month, dt.day)
        except ValueError:
            continue
    
    # Try to extract from filename (for SNCF)
    if 'salaire' in source_file.lower():
        months = ['janvier', 'fevrier', 'mars', 'avril', 'mai', 'juin', 
                 'juillet', 'aout', 'septembre', 'octobre', 'novembre', 'decembre']
        for i, month in enumerate(months, 1):
            if month.lower() in source_file.lower():
                year_match = re.search(r'20(\d{2})', source_file)
                year = int(year_match.group(1)) if year_match else datetime.now().year
                return (year, i, 1)
    
    # Default: return current date
    return (datetime.now().year, datetime.now().month, 1)

def categorize_institution(source_file):
    """
    Determine the institution based on the source filename
    """
    source_lower = source_file.lower()
    
    if 'boursobank' in source_lower or 'releve-compte' in source_lower:
        return 'Boursobank'
    elif 'american_express' in source_lower or 'amex' in source_lower:
        return 'American Express'
    elif 'monabanq' in source_lower or 'extrait de comptes' in source_lower:
        return 'Monabanq'
    elif 'revolut' in source_lower:
        return 'Revolut'
    elif 'sncf' in source_lower or 'salaire' in source_lower:
        return 'SNCF'
    elif 'la_poste' in source_lower or '2-la.poste' in source_lower or 'releve_ccp' in source_lower:
        return 'La Poste'
    
    return 'Other'

def process_csv_file(file_path):
    """
    Process a CSV file and return a list of transactions
    """
    transactions = []
    institution = categorize_institution(os.path.basename(file_path))
    
    with open(file_path, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        
        for row in reader:
            # Get the date
            date_str = row.get('Date', '')
            if not date_str:
                continue
                
            # Parse and normalize the date
            year, month, day = parse_date(date_str, row.get('Source', ''))
            
            # Get amount (handle different column names)
            amount_str = row.get('Amount', '') or row.get('Debit', '') or row.get('Credit', '0')
            try:
                amount = float(amount_str.replace(',', '.')) if amount_str else 0
            except ValueError:
                amount = 0
            
            # Create transaction record
            transactions.append({
                'year': year,
                'month': month,
                'day': day,
                'date_str': date_str,
                'description': row.get('Description', ''),
                'category': row.get('Category', 'Other'),
                'amount': amount,
                'institution': institution,
                'source': row.get('Source', os.path.basename(file_path))
            })
    
    return transactions

def main():
    parser = argparse.ArgumentParser(description='Aggregate all account statements by month')
    parser.add_argument('--input-dir', default='output/csv', 
                       help='Directory containing CSV files to aggregate (default: output/csv)')
    parser.add_argument('--output-dir', default='output/reports', 
                       help='Directory to save aggregated reports (default: output/reports)')
    
    args = parser.parse_args()
    
    # Create output directory
    os.makedirs(args.output_dir, exist_ok=True)
    
    print(f"\n{'='*60}")
    print(f"Monthly Aggregation of All Account Statements")
    print(f"Input Directory: {os.path.abspath(args.input_dir)}")
    print(f"Output Directory: {os.path.abspath(args.output_dir)}")
    print(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"{'='*60}")
    
    # Collect all transactions
    all_transactions = []
    
    # Find all CSV files in input directory
    csv_files = [f for f in os.listdir(args.input_dir) if f.endswith('.csv')]
    
    if not csv_files:
        print(f"\nError: No CSV files found in {args.input_dir}")
        return
    
    # Process each CSV file
    for csv_file in csv_files:
        file_path = os.path.join(args.input_dir, csv_file)
        print(f"\nProcessing: {csv_file}")
        transactions = process_csv_file(file_path)
        all_transactions.extend(transactions)
        print(f"  Found {len(transactions)} transactions")
    
    # Group transactions by month
    monthly_transactions = defaultdict(list)
    for transaction in all_transactions:
        key = (transaction['year'], transaction['month'])
        monthly_transactions[key].append(transaction)
    
    # Create monthly summary report
    summary_file = os.path.join(args.output_dir, 'monthly_summary.csv')
    with open(summary_file, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        
        # Header
        writer.writerow([
            'Year', 'Month', 'Total Income', 'Total Expenses', 'Net Balance',
            'Transaction Count', 'Institutions'
        ])
        
        # Process each month
        for (year, month) in sorted(monthly_transactions.keys()):
            transactions = monthly_transactions[(year, month)]
            month_name = calendar.month_name[month]
            
            # Calculate totals
            total_income = sum(t['amount'] for t in transactions if t['amount'] < 0)  # Negative amounts are income in Revolut
            total_expenses = sum(t['amount'] for t in transactions if t['amount'] > 0)
            net_balance = total_income + total_expenses
            transaction_count = len(transactions)
            
            # Get unique institutions
            institutions = sorted(list(set(t['institution'] for t in transactions)))
            institutions_str = ', '.join(institutions)
            
            # Write row
            writer.writerow([
                year, month_name, total_income, total_expenses, net_balance,
                transaction_count, institutions_str
            ])
    
    # Create detailed monthly transactions file for each month
    for (year, month) in sorted(monthly_transactions.keys()):
        month_name = calendar.month_name[month].lower()
        transactions = monthly_transactions[(year, month)]
        
        # Create filename
        detail_file = os.path.join(args.output_dir, f'transactions_{year}_{month_name}.csv')
        
        with open(detail_file, 'w', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=[
                'Date', 'Description', 'Category', 'Amount', 
                'Institution', 'Source'
            ])
            writer.writeheader()
            
            # Sort transactions by date
            sorted_transactions = sorted(transactions, key=lambda x: (x['day'], x['description']))
            
            for transaction in sorted_transactions:
                writer.writerow({
                    'Date': transaction['date_str'],
                    'Description': transaction['description'],
                    'Category': transaction['category'],
                    'Amount': transaction['amount'],
                    'Institution': transaction['institution'],
                    'Source': transaction['source']
                })
    
    # Create yearly summary
    yearly_summary = defaultdict(lambda: {'income': 0, 'expenses': 0, 'count': 0})
    for transaction in all_transactions:
        year = transaction['year']
        yearly_summary[year]['count'] += 1
        if transaction['amount'] < 0:
            yearly_summary[year]['income'] += transaction['amount']
        else:
            yearly_summary[year]['expenses'] += transaction['amount']
    
    # Create yearly summary file
    yearly_file = os.path.join(args.output_dir, 'yearly_summary.csv')
    with open(yearly_file, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Year', 'Total Income', 'Total Expenses', 'Net Balance', 'Transaction Count'])
        
        for year in sorted(yearly_summary.keys()):
            data = yearly_summary[year]
            net_balance = data['income'] + data['expenses']
            writer.writerow([
                year, data['income'], data['expenses'], net_balance, data['count']
            ])
    
    # Print summary statistics
    print(f"\n{'='*60}")
    print(f"Aggregation Complete")
    print(f"Total Transactions: {len(all_transactions)}")
    print(f"Months with Data: {len(monthly_transactions)}")
    print(f"{'='*60}")
    
    # List generated files
    generated_files = [
        os.path.basename(summary_file),
        os.path.basename(yearly_file)
    ]
    for (year, month) in sorted(monthly_transactions.keys()):
        month_name = calendar.month_name[month].lower()
        generated_files.append(f'transactions_{year}_{month_name}.csv')
    
    print("\nGenerated Files:")
    for file in generated_files:
        file_path = os.path.join(args.output_dir, file)
        if os.path.exists(file_path):
            file_size = os.path.getsize(file_path)
            print(f"  - {file} ({file_size:,} bytes)")

if __name__ == "__main__":
    main()