#!/usr/bin/env python3

import csv
from collections import defaultdict
from datetime import datetime

def clean_amount(amount_str):
    """Convert amount string to float, handling negative amounts"""
    if not amount_str or amount_str == '':
        return 0.0
    
    amount_str = str(amount_str).replace(',', '').replace(' ', '').strip()
    
    if amount_str == '' or amount_str == '-' or amount_str == 'nan':
        return 0.0
    
    try:
        return abs(float(amount_str))  # Use absolute value
    except:
        return 0.0

def categorize_credit_card_expense(merchant, amount=0, cardholder=""):
    """Categorize credit card expenses"""
    vendor = merchant.lower()
    
    # Software/Technology - Atomic Elevator
    if any(x in vendor for x in ['apple', 'landbot', 'stamps.com', 'github', 'aws', 'microsoft', 
                                'adobe', 'dropbox', 'docusign', 'zapier', 'grammarly', 'linkedin']):
        return 'Software/Technology', 'Atomic Elevator'
    
    # Marketing/Creative - Space Monkey Partners  
    if any(x in vendor for x in ['canva', 'facebook', 'facebk']):
        return 'Marketing/Content', 'Space Monkey Partners'
    
    # Travel/Business
    if any(x in vendor for x in ['aviation', 'airline', 'hotel', 'pollard', 'tst*', 'uber', 'lyft']):
        return 'Travel', 'Shared'
    
    # Food/Meals
    if any(x in vendor for x in ['bogarts', 'restaurant', 'cafe', 'starbucks', 'sq *']):
        if amount > 50:
            return 'Meals/Entertainment', 'Shared'  # Business meals
        else:
            return 'Meals/Entertainment', 'Personal'  # Personal meals
    
    # Personal services
    if any(x in vendor for x in ['barber', 'salon', 'spa']):
        return 'Personal Services', 'Personal'
    
    # Employee expenses (Alayna)
    if 'alayna' in cardholder.lower():
        if any(x in vendor for x in ['canva', 'facebook']):
            return 'Marketing/Content', 'Space Monkey Partners'
        else:
            return 'Employee Expenses', 'Shared'
    
    # Default
    return 'Other', 'Review Needed'

def process_corrected_credit_card_csv(csv_file, card_name):
    """Process credit card CSV with CORRECTED column mapping"""
    transactions = []
    
    try:
        with open(csv_file, 'r', encoding='utf-8') as f:
            reader = csv.DictReader(f)
            
            for row in reader:
                # CORRECTED COLUMN MAPPING:
                date_str = row.get('Date (UTC)', '')
                # The amount is in "Clean Merchant Name" column (due to CSV conversion issue)
                amount = clean_amount(row.get('Clean Merchant Name', ''))
                # The merchant name is in "Merchant" column  
                merchant = row.get('Merchant', '')
                cardholder = f"{row.get('First Name', '')} {row.get('Last Name', '')}".strip()
                card_last_4 = row.get('Card Last 4', '').replace("'", "")
                
                # Skip zero amounts or empty merchants
                if amount <= 0 or not merchant:
                    continue
                
                # Parse date
                try:
                    date = datetime.strptime(date_str.split()[0], '%Y-%m-%d')
                except:
                    continue
                
                # Filter to 2025-2026 data
                if date.year < 2025:
                    continue
                
                # Categorize expense
                category, business = categorize_credit_card_expense(merchant, amount, cardholder)
                
                transactions.append({
                    'date': date,
                    'amount': amount,
                    'vendor': merchant,
                    'cardholder': cardholder,
                    'category': category,
                    'business': business,
                    'source': f'Credit Card {card_last_4}',
                    'card_identifier': card_last_4
                })
                
    except Exception as e:
        print(f"Error processing {csv_file}: {e}")
        import traceback
        traceback.print_exc()
    
    return transactions

def process_bank_for_final():
    """Process bank statement"""
    csv_file = '/home/ubuntu/.openclaw/media/inbound/file_293---5c15fc2d-5f20-4d35-972b-6da60ad73643.csv'
    transactions = []
    
    with open(csv_file, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        
        for row in reader:
            if not any(row.values()):
                continue
                
            date_str = row.get('Date', '')
            debit = clean_amount(row.get('Debit', ''))
            description = row.get('Description', '')
            
            try:
                date = datetime.strptime(date_str, '%m/%d/%Y')
            except:
                continue
            
            # Only actual business expenses (exclude credit card payments)
            if (debit > 0 and 
                not any(x in description.lower() for x in 
                ['amex epayment', 'capital one crcardpmt', 'applecard gsbank', 
                 'jpmorgan chase', 'internet banking transfer', 'deposit', 
                 'mercury', 'cornerstone', 'space monkey par', 'atomic elevator',
                 'mobile capture', 'check #'])):
                
                # Categorization for bank expenses
                if 'gusto' in description.lower():
                    category, business = 'Payroll/Benefits', 'Shared'
                elif 'divvy' in description.lower():
                    category, business = 'Business Credit Card', 'Shared'
                elif any(x in description.lower() for x in ['spectrum', 'insurance']):
                    category, business = 'Office/Utilities', 'Shared'
                elif any(x in description.lower() for x in ['zapier', 'dropbox', 'netflix']):
                    category, business = 'Software/Technology', 'Atomic Elevator'
                else:
                    category, business = 'Other', 'Review Needed'
                
                transactions.append({
                    'date': date,
                    'amount': debit,
                    'vendor': description,
                    'cardholder': 'Bank Transfer',
                    'category': category,
                    'business': business,
                    'source': 'Bank Statement',
                    'card_identifier': 'Bank'
                })
    
    return transactions

def main():
    print("Processing CORRECTED final consolidated financial analysis...")
    
    all_transactions = []
    
    # Process bank statement
    print("\\n1. Processing bank statement...")
    bank_transactions = process_bank_for_final()
    all_transactions.extend(bank_transactions)
    print(f"   Bank transactions: {len(bank_transactions)}")
    
    # Process credit card 1 with corrected mapping
    print("\\n2. Processing credit card 1 (CORRECTED mapping)...")
    cc1_transactions = process_corrected_credit_card_csv('credit_card_1.csv', '4298')
    all_transactions.extend(cc1_transactions)
    print(f"   Credit card 1 transactions: {len(cc1_transactions)}")
    
    # Process credit card 2 with corrected mapping  
    print("\\n3. Processing credit card 2 (CORRECTED mapping)...")
    cc2_transactions = process_corrected_credit_card_csv('credit_card_2.csv', 'Mixed')
    all_transactions.extend(cc2_transactions)
    print(f"   Credit card 2 transactions: {len(cc2_transactions)}")
    
    # Show some sample credit card transactions to verify
    if cc1_transactions:
        print("\\n📋 SAMPLE CREDIT CARD TRANSACTIONS (verification):")
        for i, t in enumerate(cc1_transactions[:5]):
            print(f"  {t['date'].strftime('%Y-%m-%d')}: ${t['amount']:.2f} - {t['vendor']} ({t['cardholder']})")
    
    # Calculate totals
    total_expenses = sum(t['amount'] for t in all_transactions)
    
    # Breakdowns
    source_totals = defaultdict(float)
    for t in all_transactions:
        source_totals[t['source']] += t['amount']
    
    business_totals = defaultdict(float)
    for t in all_transactions:
        business_totals[t['business']] += t['amount']
    
    category_totals = defaultdict(float)
    for t in all_transactions:
        category_totals[t['category']] += t['amount']
    
    print("\\n" + "="*80)
    print("🎯 COMPLETE CONSOLIDATED FINANCIAL ANALYSIS")
    print("="*80)
    print(f"\\nTOTAL CONSOLIDATED EXPENSES: ${total_expenses:,.2f}")
    print(f"TOTAL TRANSACTIONS: {len(all_transactions):,}")
    
    print(f"\\n📊 BY SOURCE:")
    for source in sorted(source_totals.keys(), key=lambda x: source_totals[x], reverse=True):
        amount = source_totals[source]
        pct = (amount / total_expenses) * 100 if total_expenses > 0 else 0
        print(f"  {source}: ${amount:,.2f} ({pct:.1f}%)")
    
    print(f"\\n🏢 BY BUSINESS ENTITY:")
    for business in sorted(business_totals.keys(), key=lambda x: business_totals[x], reverse=True):
        amount = business_totals[business]
        pct = (amount / total_expenses) * 100 if total_expenses > 0 else 0
        print(f"  {business}: ${amount:,.2f} ({pct:.1f}%)")
    
    print(f"\\n📂 BY CATEGORY:")
    for category in sorted(category_totals.keys(), key=lambda x: category_totals[x], reverse=True)[:10]:
        amount = category_totals[category]
        pct = (amount / total_expenses) * 100 if total_expenses > 0 else 0
        print(f"  {category}: ${amount:,.2f} ({pct:.1f}%)")
    
    # Export final consolidated CSV
    csv_file = 'KEITH_FINAL_COMPLETE_EXPENSES_2025-2026.csv'
    with open(csv_file, 'w', newline='', encoding='utf-8') as f:
        fieldnames = ['date', 'amount', 'vendor', 'cardholder', 'category', 'business', 
                     'source', 'card_identifier']
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        
        writer.writeheader()
        for t in sorted(all_transactions, key=lambda x: x['date'], reverse=True):
            writer.writerow({
                'date': t['date'].strftime('%Y-%m-%d'),
                'amount': f"{t['amount']:.2f}",
                'vendor': t['vendor'],
                'cardholder': t['cardholder'],
                'category': t['category'],
                'business': t['business'],
                'source': t['source'],
                'card_identifier': t['card_identifier']
            })
    
    print(f"\\n📁 FINAL CONSOLIDATED DATA EXPORTED TO: {csv_file}")
    
    # Tax summary
    atomic_total = business_totals.get('Atomic Elevator', 0)
    smp_total = business_totals.get('Space Monkey Partners', 0)
    shared_total = business_totals.get('Shared', 0)
    personal_total = business_totals.get('Personal', 0)
    review_total = business_totals.get('Review Needed', 0)
    
    print(f"\\n🧾 TAX PREPARATION SUMMARY:")
    print(f"  Atomic Elevator: ${atomic_total:,.2f}")
    print(f"  Space Monkey Partners: ${smp_total:,.2f}")
    print(f"  Shared (50/50 split): ${shared_total:,.2f}")
    print(f"  Personal (non-deductible): ${personal_total:,.2f}")
    print(f"  Needs categorization: ${review_total:,.2f}")
    
    atomic_allocation = atomic_total + (shared_total * 0.5)
    smp_allocation = smp_total + (shared_total * 0.5)
    
    print(f"\\n💰 ESTIMATED TAX DEDUCTIONS:")
    print(f"  Atomic Elevator Total: ${atomic_allocation:,.2f}")
    print(f"  Space Monkey Partners Total: ${smp_allocation:,.2f}")
    print(f"  Combined Business Deductions: ${atomic_allocation + smp_allocation:,.2f}")
    
    return len(all_transactions), total_expenses

if __name__ == "__main__":
    main()