#!/usr/bin/env python3 """ Fully dynamic script to auto-discover and process all financial statements """ import os import subprocess import sys import glob import re import argparse from datetime import datetime def main(): """ Main function to dynamically discover and process all financial statements """ parser = argparse.ArgumentParser(description='Dynamically process all financial statements') parser.add_argument('--data-dir', help='Base directory containing PDF files (default: ../data/pdf)') parser.add_argument('--output-dir', default=None, help='Directory to save CSV output files (default: ../output/csv)') parser.add_argument('--csv', action='store_true', help='Generate CSV output files') args = parser.parse_args() # Get paths script_dir = os.path.dirname(os.path.abspath(__file__)) project_root = os.path.dirname(script_dir) # Determine data directory if args.data_dir: data_dir = args.data_dir if not os.path.isabs(data_dir): data_dir = os.path.join(project_root, data_dir) else: data_dir = os.path.join(project_root, 'data/pdf') # Set output directory if args.output_dir: output_dir = args.output_dir if not os.path.isabs(output_dir): output_dir = os.path.join(project_root, args.output_dir) else: output_dir = os.path.join(project_root, 'output/csv') # Create output directory if needed os.makedirs(output_dir, exist_ok=True) print(f"\n{'='*60}") print(f"Dynamic Financial Statement Processor") print(f"Data Directory: {os.path.abspath(data_dir)}") print(f"Output Directory: {os.path.abspath(output_dir)}") print(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") print(f"{'='*60}") # Discover all PDF directories pdf_dirs = {} # Get all directories in the data directory if not os.path.exists(data_dir): print(f"Error: Data directory not found: {data_dir}") return for item in os.listdir(data_dir): dir_path = os.path.join(data_dir, item) if os.path.isdir(dir_path): # Check if this directory contains PDF files pdf_files = glob.glob(os.path.join(dir_path, "*.pdf")) if pdf_files: # Determine account type based on directory name dir_name_lower = item.lower() if 'boursobank' in dir_name_lower or 'releve-compte' in dir_name_lower: account_type = 'Boursobank' script_name = 'process_bourso.py' elif 'american_express' in dir_name_lower or 'amex' in dir_name_lower: account_type = 'American Express' script_name = 'process_amex.py' elif 'monabanq' in dir_name_lower or 'extrait' in dir_name_lower: account_type = 'Monabanq' script_name = 'process_monabanq.py' elif 'sncf' in dir_name_lower or 'salaire' in dir_name_lower: account_type = 'SNCF' script_name = 'process_sncf_improved.py' elif 'la_poste' in dir_name_lower or 'la-poste' in dir_name_lower or 'releve_ccp' in dir_name_lower: account_type = 'La Poste' script_name = 'process_laposte_improved.py' elif 'impots' in dir_name_lower or 'impot' in dir_name_lower: account_type = 'Impôts' script_name = None # Skip tax documents else: account_type = item.replace('_', ' ').title() script_name = f'process_{account_type.lower().replace(" ", "_")}.py' pdf_dirs[account_type] = { 'path': dir_path, 'count': len(pdf_files), 'files': pdf_files, 'script': script_name } if not pdf_dirs: print("No directories with PDF files found!") return print(f"\nDiscovered {len(pdf_dirs)} directories with PDF files:") for account_type, info in pdf_dirs.items(): print(f" - {account_type}: {info['count']} PDF files") # Process each account type success_count = 0 for account_type, info in pdf_dirs.items(): if not info['script']: print(f"\nSkipping {account_type}: No processing script available") continue # For Revolut, use CSV directory instead of PDF directory process_dir = info['path'] if account_type == 'Revolut': process_dir = os.path.join(os.path.dirname(data_dir), 'raw_csv') # CSV files are in raw_csv if process_dir and not os.path.exists(process_dir): print(f"Warning: Directory not found: {process_dir}") continue # Build command cmd = [sys.executable, os.path.join(script_dir, info['script']), '--pdf-dir' if account_type != 'Revolut' else '--csv-dir', process_dir, '--output-dir', output_dir] if args.csv: cmd.append('--csv') print(f"\nProcessing {account_type}...") print(f"Running: {' '.join(cmd[2:])}") try: result = subprocess.run(cmd, check=True, capture_output=True) if result.stdout: print(result.stdout) if result.returncode == 0: success_count += 1 print(f"✓ {account_type} processing completed successfully") else: print(f"✗ {account_type} processing failed with exit code {result.returncode}") except subprocess.CalledProcessError as e: print(f"✗ Error processing {account_type}: {e}") print(f"\n{'='*60}") print(f"Processing Complete: {success_count}/{len(pdf_dirs)} accounts processed successfully") print(f"CSV files saved to: {os.path.abspath(output_dir)}") print(f"{'='*60}") if __name__ == "__main__": main()