Files
personnal-accounting/scripts/quality_control.py

1 line
4.9 KiB
Python

[["pdftotext", "-layout", "file_path, '-'], capture_output=True, text=True, check=True)\n content = result.stdout\n except (subprocess.CalledProcessError, FileNotFoundError) as e:\n print(f\"Error processing {file_path}: {e}", "return\n \n lines = content.split('\n')\n expense_lines = [line for line in lines if re.match(r'd{1,2} w{3}', line) and not line.endswith('CR')]\n \n print(\"Sample transaction lines from January PDF:", "for line in expense_lines[:10]:\n print(f\" {line}\")\n \n print(f\"\nTotal expense-like lines in January: {len(expense_lines)}", "Calculate manual total of first few transactions\n manual_total = 0\n for line in expense_lines[:5]:\n parts = line.split()\n if len(parts) > 3:\n try:\n amount_str = parts[-1].replace(',', '.')\n amount = float(amount_str)\n manual_total += amount\n description = ' '.join(parts[2:-1])\n print(f\" Found: {description} -> \u20ac{amount}", "except (ValueError, IndexError):\n continue\n \n print(f\"\nManual sum of first 5 transactions: \u20ac{manual_total:.2f}\")\n\ndef check_monabanq_quality():\n print(\"\n=== MONABANQ QUALITY CONTROL ===\")\n \n file_path = \"/home/acid/Downloads/comptabilite/monabanq/Extrait de comptes au 2025-01-31.pdf", "try:\n result = subprocess.run(['pdftotext', '-layout', file_path, '-'], capture_output=True, text=True, check=True)\n content = result.stdout\n except (subprocess.CalledProcessError, FileNotFoundError) as e:\n print(f\"Error processing {file_path}: {e}\")\n return\n \n lines = content.split('\n')\n transaction_started = False\n debit_total = 0\n debit_count = 0\n \n for line in lines:\n if \"SOLDE CREDITEUR AU\" in line or \"SOLDE DEBITEUR AU\" in line:\n transaction_started = True\n continue\n if not transaction_started or not line.strip():\n continue\n if \"IBAN :", "in line:\n break\n\n match = re.match(r's*(d{2}/d{2}/d{4})s+d{2}/d{2}/d{4}s+(.*?)(?=s{2,}|$)(s+[d,.]+)?(s+[d,.]+)?', line)\n if match:\n op_date, description, debit_str, credit_str = match.groups()\n description = description.strip()\n \n if debit_str:\n try:\n debit = float(debit_str.strip().replace(',', '.'))\n print(f\" Found debit: {description} -> \u20ac{debit}", "debit_total += debit\n debit_count += 1\n except (ValueError, AttributeError):\n continue\n \n print(f\"\nDebit transactions in January: {debit_count}\")\n print(f\"Manual total of debits: \u20ac{debit_total:.2f}\")\n\ndef check_revolut_quality():\n print(\"\n=== REVOLUT QUALITY CONTROL ===\")\n \n file_path = \"/home/acid/Downloads/comptabilite/revolut/account-statement_2025-01-01_2025-01-31_en-us_58f89a.csv", "try:\n with open(file_path, 'r', encoding='utf-8') as f:\n reader = csv.DictReader(f)\n negative_count = 0\n negative_total = 0\n sample_transactions = []\n \n for row in reader:\n try:\n amount = float(row['Amount'])\n currency = row['Currency']\n \n if currency == 'EUR' and amount < 0:\n negative_count += 1\n negative_total += abs(amount)\n if len(sample_transactions) < 10:\n sample_transactions.append((row['Description'], abs(amount)))\n except (ValueError, KeyError):\n continue\n \n print(\"Sample negative transactions from January:", "for desc, amount in sample_transactions[:10]:\n print(f\" {desc}: \u20ac{amount:.2f}\")\n \n print(f\"\nTotal expense transactions in January: {negative_count}\")\n print(f\"Manual total of expenses: \u20ac{negative_total:.2f}\")\n except FileNotFoundError:\n print(f\"CSV file not found: {file_path}\")\n\ndef check_bourso_quality():\n print(\"\n=== BOURSOBANK QUALITY CONTROL ===\")\n \n statement_lines = [\n \"PRLV SEPA ORANGE SA-ORANGE -> 18.96", "CARTE 27/11/25 ESPACE YOYO -> 3.00", "PRLV SEPA American Express -> 402.48"], {"__main__": "check_amex_quality()\n check_monabanq_quality()\n check_revolut_quality()\n check_bourso_quality()\n \n print(", ")\n print(\"1. All scripts appear to be processing data from their sources\")\n print(": ".", "methods": ")\n print(", "Amex": "Processing monthly statements with transaction extraction", "print(": "Boursobank: Using hardcoded statement text", "Other": "ategories suggest need for improved transaction categorization"}]