Refactor SNCF processor and add Revolut aggregator

- Fix SNCF NET PAYÉ EN EUROS extraction to correctly parse MENSUEL line
- Extract month/year from PDF content instead of filename
- Add new Revolut CSV processor to aggregate account statements
- Organize Revolut data files into data/csv/revolut/
- Clean up redundant scripts and reports
This commit is contained in:
Kevin Bataille
2026-02-09 16:17:48 +01:00
parent ef23d066e0
commit eb66c7a43e
85 changed files with 3270 additions and 2106 deletions

12
scripts/process_laposte_improved.py Normal file → Executable file
View File

@@ -1,3 +1,7 @@
#!/usr/bin/env python3
import subprocess
import re
import csv
@@ -54,7 +58,7 @@ def process_laposte_pdf_files(directory, output_csv=False, output_dir='../../out
continue
# Match transaction lines - they have date and amount
if re.match(r'\s*\d{2}/\d{2}/\d{4}', line):
if re.match(r'\s*\d{2}/\d{2}', line):
parts = re.split(r'\s{2,}', line)
if len(parts) >= 3:
try:
@@ -64,9 +68,9 @@ def process_laposte_pdf_files(directory, output_csv=False, output_dir='../../out
# Extract amount (look for numeric values with ¤ or €)
amount = 0
for part in parts[2:]:
part = part.strip().replace('¤', '').replace('', '')
part = part.strip().replace('¤', '').replace('', '').replace(' ', '')
if re.match(r'[\d.,]+', part):
amount_str = part.replace(' ', '').replace(',', '.')
amount_str = part.replace(',', '.')
try:
amount = float(amount_str)
break
@@ -121,4 +125,4 @@ if __name__ == "__main__":
args = parser.parse_args()
# Process all PDF files in the directory
process_laposte_pdf_files(args.pdf_dir, args.csv, args.output_dir)
process_laposte_pdf_files(args.pdf_dir, args.csv, args.output_dir)