- Fix SNCF NET PAYÉ EN EUROS extraction to correctly parse MENSUEL line - Extract month/year from PDF content instead of filename - Add new Revolut CSV processor to aggregate account statements - Organize Revolut data files into data/csv/revolut/ - Clean up redundant scripts and reports
152 lines
5.9 KiB
Python
Executable File
152 lines
5.9 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import re
|
|
import csv
|
|
import os
|
|
from collections import defaultdict
|
|
|
|
def categorize_bourso_transaction(description):
|
|
description = description.lower()
|
|
|
|
if 'ech pret' in description:
|
|
return 'Loan Repayment'
|
|
if 'american express' in description:
|
|
return 'Credit Card Payment (Amex)'
|
|
if 'orange sa' in description or 'sfr' in description or 'ste reunionnaise du radiotelep' in description:
|
|
return 'Utilities'
|
|
if 'be rock' in description:
|
|
return 'Subscription (BE ROCK)'
|
|
if 'paypal' in description:
|
|
return 'Online Purchases (Paypal)'
|
|
if 'vir virement interne' in description:
|
|
return 'Internal Transfer'
|
|
if 'retrait dab' in description:
|
|
return 'Cash Withdrawal'
|
|
if description.startswith('carte'):
|
|
return 'Card Payment'
|
|
return 'Other'
|
|
|
|
def process_bourso_statement(file_path, output_csv=False, output_dir='../../output/csv'):
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
content = f.read()
|
|
|
|
expense_summary = defaultdict(float)
|
|
total_expenses = 0
|
|
transactions_data = [] # Store all transaction data for CSV output
|
|
|
|
# A more robust regex to handle slight variations
|
|
transaction_regex = re.compile(r"^ (\d{2}/\d{2}/\d{4})\s+(.*?)\s+(\d{2}/\d{2}/\d{4})\s+([\d,.]+\s*)?([\d,.]+\s*)?$", re.MULTILINE)
|
|
|
|
transactions = transaction_regex.findall(content)
|
|
|
|
print("--- Matched Transactions ---")
|
|
for op_date, description, val_date, debit_str, credit_str in transactions:
|
|
description = description.strip()
|
|
debit = 0
|
|
if debit_str:
|
|
try:
|
|
debit = float(debit_str.strip().replace(',', '.'))
|
|
except ValueError:
|
|
continue # Skip if debit is not a valid number
|
|
|
|
category = categorize_bourso_transaction(description)
|
|
print(f"Found: {description} -> {category} -> {debit}") # DEBUG
|
|
|
|
# Store transaction data for potential CSV output
|
|
transactions_data.append({
|
|
'Date': op_date,
|
|
'Description': description,
|
|
'Category': category,
|
|
'Debit': debit,
|
|
'Credit': 0,
|
|
'Value Date': val_date
|
|
})
|
|
|
|
if debit > 0 and category != 'Internal Transfer':
|
|
expense_summary[category] += debit
|
|
total_expenses += debit
|
|
|
|
# Output CSV if requested
|
|
if output_csv:
|
|
csv_file = os.path.join(output_dir, 'boursobank_all_transactions.csv')
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
with open(csv_file, 'w', newline='', encoding='utf-8') as csvfile:
|
|
fieldnames = ['Date', 'Description', 'Category', 'Debit', 'Credit', 'Value Date']
|
|
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
writer.writerows(transactions_data)
|
|
print(f"\nTransaction data saved to {csv_file}")
|
|
|
|
print("\n--- Boursobank Expense Summary (Dec 2025) - Final ---")
|
|
print(f"Total Expenses Analyzed: €{total_expenses:,.2f}")
|
|
print("\n--- Spending by Category ---")
|
|
|
|
sorted_expenses = sorted(expense_summary.items(), key=lambda item: item[1], reverse=True)
|
|
|
|
if total_expenses > 0:
|
|
for category, total in sorted_expenses:
|
|
percentage = (total / total_expenses) * 100
|
|
print(f"{category:<25} €{total:9,.2f} ({percentage:5.2f}%)")
|
|
else:
|
|
print("No expenses found.")
|
|
|
|
return transactions_data
|
|
|
|
def process_bourso_pdf_files(directory, output_csv=False, output_dir='../../output/csv'):
|
|
import subprocess
|
|
import glob
|
|
|
|
# Get all PDF files in the directory
|
|
pdf_files = glob.glob(os.path.join(directory, "*.pdf"))
|
|
all_transactions = []
|
|
|
|
for pdf_file in pdf_files:
|
|
try:
|
|
# Convert PDF to text
|
|
result = subprocess.run(['pdftotext', '-layout', pdf_file, '-'],
|
|
capture_output=True, text=True, check=True)
|
|
content = result.stdout
|
|
|
|
# Save text to temporary file
|
|
temp_file = os.path.splitext(pdf_file)[0] + '.txt'
|
|
with open(temp_file, 'w', encoding='utf-8') as f:
|
|
f.write(content)
|
|
|
|
# Process the text file
|
|
transactions = process_bourso_statement(temp_file, output_csv, output_dir)
|
|
all_transactions.extend(transactions)
|
|
|
|
# Clean up temporary file
|
|
os.remove(temp_file)
|
|
|
|
except (subprocess.CalledProcessError, FileNotFoundError) as e:
|
|
print(f"Error processing {pdf_file}: {e}")
|
|
continue
|
|
|
|
# Output consolidated CSV if requested
|
|
if output_csv and all_transactions:
|
|
csv_file = os.path.join(output_dir, 'boursobank_all_transactions.csv')
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
with open(csv_file, 'w', newline='', encoding='utf-8') as csvfile:
|
|
fieldnames = ['Date', 'Description', 'Category', 'Debit', 'Credit', 'Value Date']
|
|
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
writer.writerows(all_transactions)
|
|
print(f"\nAll transaction data saved to {csv_file}")
|
|
|
|
return all_transactions
|
|
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
parser = argparse.ArgumentParser(description='Process Boursobank statements')
|
|
parser.add_argument('--pdf-dir', default='../data/pdf/boursobank',
|
|
help='Directory containing Boursobank PDF files')
|
|
parser.add_argument('--output-dir', default='../../output/csv',
|
|
help='Directory to save CSV output files')
|
|
parser.add_argument('--csv', action='store_true',
|
|
help='Output transaction data to CSV files')
|
|
args = parser.parse_args()
|
|
|
|
# Process all PDF files in the directory
|
|
process_bourso_pdf_files(args.pdf_dir, args.csv, args.output_dir)
|