import subprocess import re import csv import os import glob from collections import defaultdict def categorize_laposte_transaction(description): description = description.lower() if 'virement' in description or 'vir' in description: return 'Transfer' if 'retrait' in description: return 'Cash Withdrawal' if 'carte' in description or 'paiement' in description: return 'Card Payment' if 'frais' in description: return 'Bank Fees' if 'cotisation' in description: return 'Deductions' if 'impot' in description: return 'Tax' return 'Other' def process_laposte_pdf_files(directory, output_csv=False): # Get all PDF files in the directory pdf_files = glob.glob(os.path.join(directory, "*.pdf")) all_transactions = [] for pdf_file in pdf_files: try: # Convert PDF to text result = subprocess.run(['pdftotext', '-layout', pdf_file, '-'], capture_output=True, text=True, check=True) content = result.stdout # Extract transactions from the PDF lines = content.split('\n') for line in lines: # Basic regex to find transaction lines (may need refinement based on actual format) if re.match(r'\s*\d{2}/\d{2}/\d{4}', line): parts = line.split() if len(parts) > 2: try: date = parts[0] # Extract description parts between date and amount description_parts = [] amount = 0 # Find amount (last numeric value) for part in reversed(parts): if re.match(r'[\d,.]+', part): amount = float(part.replace(',', '.')) break description_parts.insert(0, part) description = ' '.join(description_parts).strip() category = categorize_laposte_transaction(description) # Store transaction for CSV output all_transactions.append({ 'Date': date, 'Description': description, 'Category': category, 'Amount': amount, 'Source': os.path.basename(pdf_file) }) except (ValueError, IndexError): continue except (subprocess.CalledProcessError, FileNotFoundError) as e: print(f"Error processing {pdf_file}: {e}") continue # Output CSV if requested if output_csv and all_transactions: csv_file = os.path.join(directory, 'laposte_all_transactions.csv') with open(csv_file, 'w', newline='', encoding='utf-8') as csvfile: fieldnames = ['Date', 'Description', 'Category', 'Amount', 'Source'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() writer.writerows(all_transactions) print(f"\nTransaction data saved to {csv_file}") print(f"--- La Poste Account Statements ---") print(f"Found {len(pdf_files)} account statement files") print(f"Processed {len(all_transactions)} transactions") return all_transactions if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description='Process La Poste account statements') parser.add_argument('--pdf-dir', default='2-la.poste', help='Directory containing La Poste PDF files') parser.add_argument('--csv', action='store_true', help='Output transaction data to CSV files') args = parser.parse_args() # Process all PDF files in the directory process_laposte_pdf_files(args.pdf_dir, args.csv)