Files
personnal-accounting/process_bourso.py

147 lines
5.5 KiB
Python

import re
import csv
import os
from collections import defaultdict
def categorize_bourso_transaction(description):
description = description.lower()
if 'ech pret' in description:
return 'Loan Repayment'
if 'american express' in description:
return 'Credit Card Payment (Amex)'
if 'orange sa' in description or 'sfr' in description or 'ste reunionnaise du radiotelep' in description:
return 'Utilities'
if 'be rock' in description:
return 'Subscription (BE ROCK)'
if 'paypal' in description:
return 'Online Purchases (Paypal)'
if 'vir virement interne' in description:
return 'Internal Transfer'
if 'retrait dab' in description:
return 'Cash Withdrawal'
if description.startswith('carte'):
return 'Card Payment'
return 'Other'
def process_bourso_statement(file_path, output_csv=False):
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
expense_summary = defaultdict(float)
total_expenses = 0
transactions_data = [] # Store all transaction data for CSV output
# A more robust regex to handle slight variations
transaction_regex = re.compile(r"^ (\d{2}/\d{2}/\d{4})\s+(.*?)\s+(\d{2}/\d{2}/\d{4})\s+([\d,.]+\s*)?([\d,.]+\s*)?$", re.MULTILINE)
transactions = transaction_regex.findall(content)
print("--- Matched Transactions ---")
for op_date, description, val_date, debit_str, credit_str in transactions:
description = description.strip()
debit = 0
if debit_str:
try:
debit = float(debit_str.strip().replace(',', '.'))
except ValueError:
continue # Skip if debit is not a valid number
category = categorize_bourso_transaction(description)
print(f"Found: {description} -> {category} -> {debit}") # DEBUG
# Store transaction data for potential CSV output
transactions_data.append({
'Date': op_date,
'Description': description,
'Category': category,
'Debit': debit,
'Credit': 0,
'Value Date': val_date
})
if debit > 0 and category != 'Internal Transfer':
expense_summary[category] += debit
total_expenses += debit
# Output CSV if requested
if output_csv:
csv_file = os.path.splitext(os.path.basename(file_path))[0] + '_transactions.csv'
with open(csv_file, 'w', newline='', encoding='utf-8') as csvfile:
fieldnames = ['Date', 'Description', 'Category', 'Debit', 'Credit', 'Value Date']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(transactions_data)
print(f"\nTransaction data saved to {csv_file}")
print("\n--- Boursobank Expense Summary (Dec 2025) - Final ---")
print(f"Total Expenses Analyzed: €{total_expenses:,.2f}")
print("\n--- Spending by Category ---")
sorted_expenses = sorted(expense_summary.items(), key=lambda item: item[1], reverse=True)
if total_expenses > 0:
for category, total in sorted_expenses:
percentage = (total / total_expenses) * 100
print(f"{category:<25}{total:9,.2f} ({percentage:5.2f}%)")
else:
print("No expenses found.")
return transactions_data
def process_bourso_pdf_files(directory, output_csv=False):
import subprocess
import glob
# Get all PDF files in the directory
pdf_files = glob.glob(os.path.join(directory, "*.pdf"))
all_transactions = []
for pdf_file in pdf_files:
try:
# Convert PDF to text
result = subprocess.run(['pdftotext', '-layout', pdf_file, '-'],
capture_output=True, text=True, check=True)
content = result.stdout
# Save text to temporary file
temp_file = os.path.splitext(pdf_file)[0] + '.txt'
with open(temp_file, 'w', encoding='utf-8') as f:
f.write(content)
# Process the text file
transactions = process_bourso_statement(temp_file, output_csv)
all_transactions.extend(transactions)
# Clean up temporary file
os.remove(temp_file)
except (subprocess.CalledProcessError, FileNotFoundError) as e:
print(f"Error processing {pdf_file}: {e}")
continue
# Output consolidated CSV if requested
if output_csv and all_transactions:
csv_file = os.path.join(directory, 'boursobank_all_transactions.csv')
with open(csv_file, 'w', newline='', encoding='utf-8') as csvfile:
fieldnames = ['Date', 'Description', 'Category', 'Debit', 'Credit', 'Value Date']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(all_transactions)
print(f"\nAll transaction data saved to {csv_file}")
return all_transactions
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description='Process Boursobank statements')
parser.add_argument('--pdf-dir', default='boursobank',
help='Directory containing Boursobank PDF files')
parser.add_argument('--csv', action='store_true',
help='Output transaction data to CSV files')
args = parser.parse_args()
# Process all PDF files in the directory
process_bourso_pdf_files(args.pdf_dir, args.csv)