Files
personnal-accounting/finanancial_processor.py
Kevin Bataille eb66c7a43e Refactor SNCF processor and add Revolut aggregator
- Fix SNCF NET PAYÉ EN EUROS extraction to correctly parse MENSUEL line
- Extract month/year from PDF content instead of filename
- Add new Revolut CSV processor to aggregate account statements
- Organize Revolut data files into data/csv/revolut/
- Clean up redundant scripts and reports
2026-02-09 16:17:48 +01:00

188 lines
6.1 KiB
Python

#!/usr/bin/env python3
"""
Main script to process all financial statements with a clean one-file-per-structure
"""
import os
import sys
import glob
import subprocess
import argparse
from datetime import datetime
# Import functionality from the dynamic processor
# Add the directory to the path so we can import it
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from dynamic_processor import discover_pdf_directories, process_dynamic_pdf_files
def main():
"""
Main function with a clean, organized structure
"""
parser = argparse.ArgumentParser(description='Process financial statements with one file per entity')
parser.add_argument('--data-dir',
help='Base directory containing PDF files (default: auto-discovered)')
parser.add_argument('--output-dir', default=None,
help='Directory to save CSV output files (default: auto-discovered)')
parser.add_argument('--csv', action='store_true',
help='Generate CSV output files')
parser.add_argument('--single', action='store_true',
help='Process only the current entity (for testing)')
args = parser.parse_args()
# Get paths
script_dir = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.dirname(script_dir)
# Determine data directory
if args.data_dir:
data_dir = args.data_dir
if not os.path.isabs(data_dir):
data_dir = os.path.join(project_root, data_dir)
else:
data_dir = os.path.join(project_root, 'data/pdf')
# Set output directory
output_dir = args.output_dir or os.path.join(project_root, 'output/csv')
os.makedirs(output_dir, exist_ok=True)
print(f"\n{'='*60}")
print(f"Financial Statement Processor")
print(f"Data Directory: {os.path.abspath(data_dir)}")
print(f"Output Directory: {os.path.abspath(output_dir)}")
print(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print(f"{'='*60}")
# Discover all PDF directories
pdf_dirs = discover_pdf_directories(data_dir)
if not pdf_dirs:
print("No directories with PDF files found!")
return
print(f"\nDiscovered {len(pdf_dirs)} directories with PDF files:")
for account_type, info in pdf_dirs.items():
print(f" - {account_type}: {info['count']} files in {info['path']}")
# Process each account type to its own file
for account_type, info in pdf_dirs.items():
if account_type not in ['Boursobank', 'American Express', 'Monabanq', 'SNCF', 'La Poste']:
continue # Skip unsupported types
# Create a specialized processor for each account type
if account_type == 'Revolut':
# Special case for Revolut (CSV files)
process_revolut(data_dir, output_dir, args.csv)
else:
process_pdf_account(account_type, info, output_dir, args.csv, args.single)
print(f"\n{'='*60}")
print(f"Processing Complete")
def process_revolut(data_dir, output_dir, generate_csv, single_mode=False):
"""Process Revolut CSV files"""
# Revolut CSV files are in raw_csv directory
csv_dir = os.path.join(os.path.dirname(data_dir), 'raw_csv')
csv_files = glob.glob(os.path.join(csv_dir, "*.csv"))
if not csv_files:
print(f"No Revolut CSV files found in {csv_dir}")
return
# Sort files by date
csv_files.sort()
for csv_file in csv_files:
print(f"Processing Revolut CSV: {os.path.basename(csv_file)}")
# Build the command
cmd = [
sys.executable,
os.path.join(os.path.dirname(os.path.abspath(__file__)),
'process_expenses.py',
'--csv-dir', csv_dir,
'--output-dir', output_dir
]
if generate_csv:
cmd.append('--csv')
if single_mode:
cmd.append('--single')
try:
result = subprocess.run(cmd, check=True, capture_output=True)
if result.stdout:
print(result.stdout)
except subprocess.CalledProcessError as e:
print(f"Error processing {csv_file}: {e}")
def process_pdf_account(account_type, info, output_dir, generate_csv, single_mode=False):
"""Create and run a specialized processor for a PDF-based account"""
# Create a temporary processor script
processor_name = f"{account_type.lower().replace(' ', '_')}_processor.py"
processor_content = f'''#!/usr/bin/env python3
"""
Temporary processor for {account_type}
"""
import os
import sys
import subprocess
import glob
import csv
def main():
import argparse
parser = argparse.ArgumentParser(description='Process {account_type} statements')
parser.add_argument('--pdf-dir')
parser.add_argument('--output-dir')
parser.add_argument('--csv')
args = parser.parse_args()
cmd = [
sys.executable, os.path.join(os.path.dirname(os.path.abspath(__file__))),
'process_{"account_type.lower().replace(' ', '_')}.py',
'--pdf-dir', args.pdf_dir,
'--output-dir', args.output_dir
]
if args.csv:
cmd.append('--csv')
subprocess.run(cmd, check=True)
'''
# Write the processor script
processor_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), processor_name)
with open(processor_path, 'w') as f:
f.write(processor_content)
# Make it executable and run it
os.chmod(processor_path, 0o755)
cmd = [sys.executable, processor_path, '--pdf-dir', info['path'], '--output-dir', output_dir]
if generate_csv:
cmd.append('--csv')
if single_mode:
cmd.append('--single')
print(f"Running: {' '.join(cmd[2:])}")
try:
result = subprocess.run(cmd, check=True, capture_output=True)
if result.stdout:
print(result.stdout)
except subprocess.CalledProcessError as e:
print(f"Error: {e}")
# Clean up the temporary script
os.remove(processor_path)
if __name__ == "__main__":
main()