Add scripts to export all CSV files and aggregate by month
This commit is contained in:
132
scripts/export_all_csv.py
Executable file
132
scripts/export_all_csv.py
Executable file
@@ -0,0 +1,132 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script to output CSV files for all account statements
|
||||
"""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import argparse
|
||||
from datetime import datetime
|
||||
|
||||
def run_script(script_path, pdf_dir, output_dir, use_csv_dir=False):
|
||||
"""Run a processing script with the specified parameters"""
|
||||
if use_csv_dir: # For Revolut which uses CSV input
|
||||
cmd = [sys.executable, script_path, '--csv-dir', pdf_dir, '--output-dir', output_dir, '--csv']
|
||||
else:
|
||||
cmd = [sys.executable, script_path, '--pdf-dir', pdf_dir, '--output-dir', output_dir, '--csv']
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Processing {script_path.replace('../scripts/', '').replace('.py', '').replace('_', ' ').title()} statements...")
|
||||
print('='*60)
|
||||
|
||||
try:
|
||||
result = subprocess.run(cmd, check=True)
|
||||
return True
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error running {script_path}: {e}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
# Get absolute paths
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
project_root = os.path.dirname(script_dir)
|
||||
|
||||
parser = argparse.ArgumentParser(description='Process all account statements and output CSV files')
|
||||
parser.add_argument('--output-dir', default=os.path.join(project_root, 'output/csv'),
|
||||
help='Directory to save CSV output files')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Create output directory if it doesn't exist
|
||||
os.makedirs(args.output_dir, exist_ok=True)
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"All Account Statements CSV Export")
|
||||
print(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
print(f"Output Directory: {os.path.abspath(args.output_dir)}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# Get absolute paths
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
project_root = os.path.dirname(script_dir)
|
||||
data_dir = os.path.join(project_root, 'data/pdf')
|
||||
raw_csv_dir = os.path.join(project_root, 'data/raw_csv')
|
||||
|
||||
# Define account types and their corresponding directories and scripts
|
||||
accounts = [
|
||||
{
|
||||
'name': 'Boursobank',
|
||||
'script': os.path.join(script_dir, 'process_bourso.py'),
|
||||
'data_dir': os.path.join(data_dir, 'boursobank'),
|
||||
'use_csv_dir': False
|
||||
},
|
||||
{
|
||||
'name': 'American Express',
|
||||
'script': os.path.join(script_dir, 'process_amex.py'),
|
||||
'data_dir': os.path.join(data_dir, 'american_express'),
|
||||
'use_csv_dir': False
|
||||
},
|
||||
{
|
||||
'name': 'Monabanq',
|
||||
'script': os.path.join(script_dir, 'process_monabanq.py'),
|
||||
'data_dir': os.path.join(data_dir, 'monabanq'),
|
||||
'use_csv_dir': False
|
||||
},
|
||||
{
|
||||
'name': 'Revolut',
|
||||
'script': os.path.join(script_dir, 'process_expenses.py'),
|
||||
'data_dir': raw_csv_dir, # Revolut uses CSV input
|
||||
'use_csv_dir': True
|
||||
},
|
||||
{
|
||||
'name': 'SNCF',
|
||||
'script': os.path.join(script_dir, 'process_sncf.py'),
|
||||
'data_dir': os.path.join(data_dir, '1-sncf'),
|
||||
'use_csv_dir': False
|
||||
},
|
||||
{
|
||||
'name': 'La Poste',
|
||||
'script': os.path.join(script_dir, 'process_laposte.py'),
|
||||
'data_dir': os.path.join(data_dir, '2-la.poste'),
|
||||
'use_csv_dir': False
|
||||
}
|
||||
]
|
||||
|
||||
# Process each account
|
||||
success_count = 0
|
||||
total_accounts = len(accounts)
|
||||
|
||||
for account in accounts:
|
||||
# Check if directory exists and has files
|
||||
if not os.path.exists(account['data_dir']):
|
||||
print(f"\nWarning: Directory not found for {account['name']}: {account['data_dir']}")
|
||||
continue
|
||||
|
||||
# Skip if directory is empty
|
||||
if not os.listdir(account['data_dir']):
|
||||
print(f"\nSkipping {account['name']}: No files found in {account['data_dir']}")
|
||||
continue
|
||||
|
||||
# Run the processing script with appropriate parameter name
|
||||
if run_script(account['script'], account['data_dir'], args.output_dir, account['use_csv_dir']):
|
||||
success_count += 1
|
||||
|
||||
# Print summary
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Processing Complete: {success_count}/{total_accounts} accounts processed successfully")
|
||||
print(f"CSV files have been saved to: {os.path.abspath(args.output_dir)}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# List generated CSV files
|
||||
if os.path.exists(args.output_dir):
|
||||
csv_files = [f for f in os.listdir(args.output_dir) if f.endswith('.csv')]
|
||||
if csv_files:
|
||||
print(f"\nGenerated CSV Files:")
|
||||
for file in sorted(csv_files):
|
||||
file_path = os.path.join(args.output_dir, file)
|
||||
file_size = os.path.getsize(file_path)
|
||||
print(f" - {file} ({file_size:,} bytes)")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user