#!/usr/bin/env python3

import csv
from collections import defaultdict
from datetime import datetime
import re

def clean_amount(amount_str):
    """Convert amount string to float, handling negative amounts from credit cards"""
    if not amount_str or amount_str == '':
        return 0.0
    
    # Remove commas and spaces
    amount_str = str(amount_str).replace(',', '').replace(' ', '').replace("'", "").strip()
    
    if amount_str == '' or amount_str == '-' or amount_str == 'nan':
        return 0.0
    
    try:
        return abs(float(amount_str))  # Use absolute value since credit card amounts are negative
    except:
        return 0.0

def categorize_credit_card_expense(merchant, amount=0, cardholder="", category_hint=""):
    """Enhanced categorization for credit card expenses"""
    vendor = merchant.lower()
    hint = category_hint.lower() if category_hint else ""
    
    # Software/Technology - Atomic Elevator
    software_keywords = ['apple.com', 'landbot', 'stamps.com', 'github', 'aws', 'microsoft', 
                        'adobe', 'dropbox', 'docusign', 'zapier', 'grammarly', 'linkedin']
    if any(x in vendor for x in software_keywords):
        return 'Software/Technology', 'Atomic Elevator'
    
    # Marketing/Creative - Space Monkey Partners  
    marketing_keywords = ['canva', 'facebook', 'facebk', 'constant contact', 'mailchimp', 
                         'shutterstoc', 'getty', 'unsplash']
    if any(x in vendor for x in marketing_keywords):
        return 'Marketing/Content', 'Space Monkey Partners'
    
    # Travel/Business
    travel_keywords = ['aviation', 'lowe aviation', 'airline', 'hotel', 'pollard hotel', 
                      'tst*', 'uber', 'lyft', 'airbnb']
    if any(x in vendor for x in travel_keywords):
        return 'Travel', 'Shared'  # Travel can benefit both businesses
    
    # Food/Meals - categorize by amount and context
    food_keywords = ['bogarts', 'restaurant', 'cafe', 'starbucks', 'sq *', 'barber']
    if any(x in vendor for x in food_keywords) or 'food' in hint:
        if amount > 50:  # Likely business meal
            return 'Meals/Entertainment', 'Shared'
        else:  # Likely personal
            return 'Meals/Entertainment', 'Personal'
    
    # Personal services
    personal_keywords = ['barber', 'salon', 'spa', 'gym', 'fitness']
    if any(x in vendor for x in personal_keywords):
        return 'Personal Services', 'Personal'
    
    # Based on cardholder
    if 'alayna' in cardholder.lower():
        # Alayna's expenses are likely business (she's an employee)
        return 'Employee Expenses', 'Shared'
    
    # Default for unrecognized
    return 'Other', 'Review Needed'

def process_credit_card_detailed(csv_file, card_name):
    """Process credit card CSV with full field support"""
    transactions = []
    
    try:
        with open(csv_file, 'r', encoding='utf-8') as f:
            reader = csv.DictReader(f)
            
            for row in reader:
                # Extract key fields
                date_str = row.get('Date (UTC)', '')
                amount = clean_amount(row.get('Amount', ''))
                merchant = row.get('Clean Merchant Name', '') or row.get('Merchant', '')
                cardholder = f"{row.get('First Name', '')} {row.get('Last Name', '')}".strip()
                category_hint = row.get('Category', '')
                card_last_4 = row.get('Card Last 4', '').replace("'", "")
                status = row.get('Status', '')
                
                # Skip zero amounts or incomplete transactions that aren't expenses
                if amount <= 0 or not merchant:
                    continue
                
                # Parse date
                try:
                    date = datetime.strptime(date_str.split()[0], '%Y-%m-%d')
                except:
                    continue
                
                # Filter to 2025 and 2026 data
                if date.year < 2025:
                    continue
                
                # Categorize expense
                category, business = categorize_credit_card_expense(
                    merchant, amount, cardholder, category_hint)
                
                transactions.append({
                    'date': date,
                    'amount': amount,
                    'vendor': merchant,
                    'cardholder': cardholder,
                    'category': category,
                    'business': business,
                    'source': f'Credit Card {card_last_4}',
                    'card_identifier': card_last_4,
                    'status': status,
                    'original_category': category_hint
                })
                
    except Exception as e:
        print(f"Error processing {csv_file}: {e}")
    
    return transactions

def process_bank_statement_simple():
    """Simple bank statement processing"""
    csv_file = '/home/ubuntu/.openclaw/media/inbound/file_293---5c15fc2d-5f20-4d35-972b-6da60ad73643.csv'
    transactions = []
    
    with open(csv_file, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        
        for row in reader:
            if not any(row.values()):
                continue
                
            date_str = row.get('Date', '')
            debit = clean_amount(row.get('Debit', ''))
            description = row.get('Description', '')
            
            try:
                date = datetime.strptime(date_str, '%m/%d/%Y')
            except:
                continue
            
            # Only track actual business expenses (exclude transfers and CC payments)
            if (debit > 0 and 
                not any(x in description.lower() for x in 
                ['amex epayment', 'capital one crcardpmt', 'applecard gsbank', 
                 'jpmorgan chase', 'internet banking transfer', 'deposit', 
                 'mercury', 'cornerstone', 'space monkey par', 'atomic elevator',
                 'mobile capture', 'check #'])):
                
                # Quick categorization for bank expenses
                if 'gusto' in description.lower():
                    category, business = 'Payroll/Benefits', 'Shared'
                elif 'divvy' in description.lower():
                    category, business = 'Business Credit Card', 'Shared'
                elif any(x in description.lower() for x in ['spectrum', 'insurance']):
                    category, business = 'Office/Utilities', 'Shared'
                else:
                    category, business = 'Other', 'Review Needed'
                
                transactions.append({
                    'date': date,
                    'amount': debit,
                    'vendor': description,
                    'cardholder': 'Bank Transfer',
                    'category': category,
                    'business': business,
                    'source': 'Bank Statement',
                    'card_identifier': 'Bank',
                    'status': 'Complete',
                    'original_category': ''
                })
    
    return transactions

def generate_quick_summary(all_transactions):
    """Generate summary of consolidated data"""
    
    expenses = [t for t in all_transactions]
    total_expenses = sum(t['amount'] for t in expenses)
    
    # By source
    source_totals = defaultdict(float)
    source_counts = defaultdict(int)
    for t in expenses:
        source_totals[t['source']] += t['amount']
        source_counts[t['source']] += 1
    
    # By business
    business_totals = defaultdict(float)
    for t in expenses:
        business_totals[t['business']] += t['amount']
    
    # By cardholder
    cardholder_totals = defaultdict(float)
    for t in expenses:
        cardholder_totals[t['cardholder']] += t['amount']
    
    # Monthly totals
    monthly_totals = defaultdict(float)
    for t in expenses:
        month = t['date'].strftime('%Y-%m')
        monthly_totals[month] += t['amount']
    
    print("\n" + "="*80)
    print("COMPLETE FINANCIAL CONSOLIDATION SUMMARY")
    print("="*80)
    
    print(f"\nTOTAL CONSOLIDATED EXPENSES: ${total_expenses:,.2f}")
    print(f"TOTAL TRANSACTIONS: {len(expenses):,}")
    
    print(f"\nBY SOURCE:")
    for source in sorted(source_totals.keys()):
        amount = source_totals[source]
        count = source_counts[source]
        pct = (amount / total_expenses) * 100 if total_expenses > 0 else 0
        print(f"  {source}: ${amount:,.2f} ({count:,} transactions, {pct:.1f}%)")
    
    print(f"\nBY BUSINESS ENTITY:")
    for business in sorted(business_totals.keys(), key=lambda x: business_totals[x], reverse=True):
        amount = business_totals[business]
        pct = (amount / total_expenses) * 100 if total_expenses > 0 else 0
        print(f"  {business}: ${amount:,.2f} ({pct:.1f}%)")
    
    print(f"\nBY CARDHOLDER:")
    for cardholder in sorted(cardholder_totals.keys(), key=lambda x: cardholder_totals[x], reverse=True):
        amount = cardholder_totals[cardholder]
        pct = (amount / total_expenses) * 100 if total_expenses > 0 else 0
        print(f"  {cardholder}: ${amount:,.2f} ({pct:.1f}%)")
    
    # Export to CSV
    csv_file = 'FINAL_CONSOLIDATED_EXPENSES.csv'
    with open(csv_file, 'w', newline='', encoding='utf-8') as f:
        fieldnames = ['date', 'amount', 'vendor', 'cardholder', 'category', 'business', 
                     'source', 'card_identifier', 'status', 'original_category']
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        
        writer.writeheader()
        for t in sorted(expenses, key=lambda x: x['date'], reverse=True):
            writer.writerow({
                'date': t['date'].strftime('%Y-%m-%d'),
                'amount': f"{t['amount']:.2f}",
                'vendor': t['vendor'],
                'cardholder': t['cardholder'],
                'category': t['category'],
                'business': t['business'],
                'source': t['source'],
                'card_identifier': t['card_identifier'],
                'status': t['status'],
                'original_category': t['original_category']
            })
    
    print(f"\nConsolidated data exported to: {csv_file}")
    return expenses

def main():
    print("Processing complete financial consolidation with fixed parsing...")
    
    all_transactions = []
    
    # Process bank statement
    print("\n1. Processing bank statement...")
    bank_transactions = process_bank_statement_simple()
    all_transactions.extend(bank_transactions)
    print(f"   Bank transactions: {len(bank_transactions)}")
    
    # Process credit card 1
    print("\n2. Processing credit card 1 (large file)...")
    cc1_transactions = process_credit_card_detailed('credit_card_1.csv', '4298')
    all_transactions.extend(cc1_transactions)
    print(f"   Credit card 1 transactions: {len(cc1_transactions)}")
    
    # Process credit card 2
    print("\n3. Processing credit card 2 (mixed cardholders)...")
    cc2_transactions = process_credit_card_detailed('credit_card_2.csv', 'Mixed')
    all_transactions.extend(cc2_transactions)
    print(f"   Credit card 2 transactions: {len(cc2_transactions)}")
    
    # Generate summary
    expenses = generate_quick_summary(all_transactions)
    
    return expenses

if __name__ == "__main__":
    main()