Initial commit with CSV export functionality for all financial statement processing scripts

2026-02-09 10:08:43 +01:00
commit acb1276b38
106 changed files with 13858 additions and 0 deletions
--- a/process_sncf.py
+++ b/process_sncf.py
@@ -0,0 +1,85 @@
+import subprocess
+import re
+import csv
+import os
+import glob
+from collections import defaultdict
+
+def categorize_sncf_transaction(description):
+    description = description.lower()
+    
+    # For salary statements, we'll categorize based on the different components
+    if 'salaire' in description:
+        return 'Salary'
+    if 'prime' in description:
+        return 'Bonus/Prime'
+    if 'cotisation' in description or 'retenue' in description:
+        return 'Deductions'
+    if 'impot' in description:
+        return 'Tax'
+    if 'avantage' in description:
+        return 'Benefits'
+        
+    return 'Other'
+
+def process_sncf_pdf_files(directory, output_csv=False):
+    # Get all PDF files in the directory
+    pdf_files = glob.glob(os.path.join(directory, "*.pdf"))
+    all_transactions = []
+    
+    for pdf_file in pdf_files:
+        try:
+            # Convert PDF to text
+            result = subprocess.run(['pdftotext', '-layout', pdf_file, '-'], 
+                                  capture_output=True, text=True, check=True)
+            content = result.stdout
+            
+            # Extract basic information from the PDF
+            lines = content.split('\n')
+            month = "Unknown"
+            for line in lines:
+                if 'salaire de' in line.lower():
+                    # Extract month from filename or content
+                    month = os.path.basename(pdf_file).split(' ')[2] if len(os.path.basename(pdf_file).split(' ')) > 2 else "Unknown"
+                    break
+            
+            # Add basic transaction record
+            all_transactions.append({
+                'Date': f"01/{month}/2025",  # Simplified date extraction
+                'Description': f"Salaire {month} 2025",
+                'Category': 'Salary',
+                'Amount': 0,  # Would need more specific parsing
+                'Source': os.path.basename(pdf_file)
+            })
+            
+        except (subprocess.CalledProcessError, FileNotFoundError) as e:
+            print(f"Error processing {pdf_file}: {e}")
+            continue
+    
+    # Output CSV if requested
+    if output_csv and all_transactions:
+        csv_file = os.path.join(directory, 'sncf_all_transactions.csv')
+        with open(csv_file, 'w', newline='', encoding='utf-8') as csvfile:
+            fieldnames = ['Date', 'Description', 'Category', 'Amount', 'Source']
+            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+            writer.writeheader()
+            writer.writerows(all_transactions)
+        print(f"\nTransaction data saved to {csv_file}")
+    
+    print(f"--- SNCF Salary Statements ---")
+    print(f"Found {len(pdf_files)} salary statement files")
+    
+    return all_transactions
+
+if __name__ == "__main__":
+    import argparse
+    
+    parser = argparse.ArgumentParser(description='Process SNCF salary statements')
+    parser.add_argument('--pdf-dir', default='1-sncf', 
+                       help='Directory containing SNCF PDF files')
+    parser.add_argument('--csv', action='store_true', 
+                       help='Output transaction data to CSV files')
+    args = parser.parse_args()
+    
+    # Process all PDF files in the directory
+    process_sncf_pdf_files(args.pdf_dir, args.csv)