#!/usr/bin/env python3

import csv
from collections import defaultdict
from datetime import datetime
import re

def clean_amount(amount_str):
    """Convert amount string to float, handling various formats"""
    if not amount_str or amount_str == '':
        return 0.0
    
    # Remove commas and spaces, handle negative amounts
    amount_str = str(amount_str).replace(',', '').replace(' ', '').replace("'", "").strip()
    
    # Handle empty or non-numeric values
    if amount_str == '' or amount_str == '-' or amount_str == 'nan':
        return 0.0
    
    try:
        return abs(float(amount_str))  # Use absolute value for consistency
    except:
        return 0.0

def categorize_expense_comprehensive(vendor_name, description="", amount=0):
    """Enhanced categorization for complete expense analysis"""
    vendor = vendor_name.lower()
    desc = description.lower()
    
    # Software/Technology - Atomic Elevator
    software_keywords = ['zapier', 'dropbox', 'docusign', 'cleverbridg', 'avast', 'malwarebytes', 
                        'grammarly', 'wisestamp', 'linkedin', 'networksolu', 'github', 'aws',
                        'microsoft', 'adobe', 'landbot', 'openai', 'anthropic']
    if any(x in vendor for x in software_keywords):
        return 'Software/Technology', 'Atomic Elevator'
    
    # Apple purchases - could be business or personal based on amount
    if 'apple' in vendor:
        if amount > 100:  # Likely hardware/business
            return 'Software/Technology', 'Atomic Elevator'
        else:  # Likely app subscriptions
            return 'Software/Technology', 'Review Needed'
    
    # Marketing/Creative - Space Monkey Partners
    marketing_keywords = ['shutterstoc', 'canva', 'constant contact', 'mailchimp', 'unsplash',
                         'getty', 'adobe creative', 'figma', 'design', 'marketing']
    if any(x in vendor for x in marketing_keywords):
        return 'Marketing/Content', 'Space Monkey Partners'
    
    # Entertainment/Personal - Non-deductible
    personal_keywords = ['spotify', 'peacocktv', 'hulu', 'disney', 'netflix', 'paramount', 
                        'dollarshave', 'headspace', 'noom', 'gym', 'fitness', 'barber']
    if any(x in vendor for x in personal_keywords):
        return 'Entertainment/Personal', 'Personal'
    
    # Travel/Business - Mixed
    travel_keywords = ['airbnb', 'uber', 'lyft', 'starbucks', 'hotel', 'airlines', 'aviation',
                      'tst*', 'pollard hotel', 'bogarts', 'lowe aviation']
    if any(x in vendor for x in travel_keywords):
        return 'Travel', 'Review Needed'  # Needs business purpose documentation
    
    # Food/Meals - Could be business or personal
    food_keywords = ['restaurant', 'cafe', 'coffee', 'food', 'dining', 'sq *']
    if any(x in vendor for x in food_keywords):
        if amount > 50:  # Likely business meal
            return 'Meals/Entertainment', 'Review Needed'
        else:  # Likely personal
            return 'Meals/Entertainment', 'Personal'
    
    # Professional Services
    professional_keywords = ['law', 'legal', 'accounting', 'consultant', 'professional',
                           'innago', 'ssbtrustops', 'montana secretar']
    if any(x in vendor for x in professional_keywords):
        return 'Professional Services', 'Shared'
    
    # Insurance
    if any(x in vendor for x in ['insurance', 'state farm']):
        return 'Insurance', 'Shared'
    
    # Utilities/Infrastructure
    utilities_keywords = ['spectrum', 'hostgator', 'internet', 'phone', 'wireless']
    if any(x in vendor for x in utilities_keywords):
        return 'Office/Utilities', 'Shared'
    
    # Payroll/Employee
    if any(x in vendor for x in ['gusto', 'guideline', 'payroll']):
        return 'Payroll/Benefits', 'Shared'
    
    # Banking/Financial
    banking_keywords = ['service charge', 'bank fee', 'interest', 'finance charge']
    if any(x in vendor for x in banking_keywords):
        return 'Banking Fees', 'Shared'
    
    # Items that need manual review
    review_keywords = ['medium', 'samsung', 'affirm', 'wifionboard', 'townlapoint', 'unknown']
    if any(x in vendor for x in review_keywords):
        return 'Review Needed', 'Review Needed'
    
    # Default category for unrecognized vendors
    return 'Other', 'Review Needed'

def process_credit_card_csv(csv_file, card_name):
    """Process credit card CSV file"""
    transactions = []
    
    try:
        with open(csv_file, 'r', encoding='utf-8') as f:
            reader = csv.DictReader(f)
            
            for row in reader:
                # Skip header or empty rows
                if not row.get('Amount') or row.get('Amount') in ['Amount', '']:
                    continue
                
                # Extract key fields
                date_str = row.get('Date (UTC)', '')
                amount = clean_amount(row.get('Amount', ''))
                merchant = row.get('Clean Merchant Name', '') or row.get('Merchant', '')
                cardholder = f"{row.get('First Name', '')} {row.get('Last Name', '')}".strip()
                
                # Parse date
                try:
                    date = datetime.strptime(date_str.split()[0], '%Y-%m-%d')
                except:
                    continue
                
                # Skip zero amounts
                if amount <= 0:
                    continue
                
                # Categorize expense
                category, business = categorize_expense_comprehensive(merchant, merchant, amount)
                
                transactions.append({
                    'date': date,
                    'amount': amount,
                    'vendor': merchant,
                    'cardholder': cardholder,
                    'category': category,
                    'business': business,
                    'source': f'Credit Card {card_name}',
                    'card_identifier': card_name
                })
    except Exception as e:
        print(f"Error processing {csv_file}: {e}")
    
    return transactions

def process_bank_statement_enhanced():
    """Process bank statement with enhanced categorization"""
    csv_file = '/home/ubuntu/.openclaw/media/inbound/file_293---5c15fc2d-5f20-4d35-972b-6da60ad73643.csv'
    transactions = []
    
    with open(csv_file, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        
        for row in reader:
            if not any(row.values()):
                continue
                
            date_str = row.get('Date', '')
            debit = clean_amount(row.get('Debit', ''))
            description = row.get('Description', '')
            
            try:
                date = datetime.strptime(date_str, '%m/%d/%Y')
            except:
                continue
            
            # Only process debits (expenses) and exclude credit card payments
            if debit > 0 and not any(x in description.lower() for x in 
                ['amex epayment', 'capital one crcardpmt', 'applecard gsbank', 'jpmorgan chase']):
                
                category, business = categorize_expense_comprehensive(description, description, debit)
                
                # Skip internal transfers for expense analysis
                if category not in ['Credit Card Payment', 'Business Revenue/Transfer']:
                    transactions.append({
                        'date': date,
                        'amount': debit,
                        'vendor': description,
                        'cardholder': 'Bank Transfer',
                        'category': category,
                        'business': business,
                        'source': 'Bank Statement',
                        'card_identifier': 'Bank'
                    })
    
    return transactions

def generate_comprehensive_report(all_transactions):
    """Generate complete financial analysis across all sources"""
    
    # Filter to actual expenses (not transfers)
    expenses = [t for t in all_transactions if t['category'] not in 
              ['Credit Card Payment', 'Business Revenue/Transfer']]
    
    total_expenses = sum(t['amount'] for t in expenses)
    
    # Breakdown by source
    source_totals = defaultdict(float)
    for t in expenses:
        source_totals[t['source']] += t['amount']
    
    # Category breakdown
    category_totals = defaultdict(float)
    for t in expenses:
        category_totals[t['category']] += t['amount']
    
    # Business entity breakdown
    business_totals = defaultdict(float)
    for t in expenses:
        business_totals[t['business']] += t['amount']
    
    # Cardholder breakdown
    cardholder_totals = defaultdict(float)
    for t in expenses:
        cardholder_totals[t['cardholder']] += t['amount']
    
    # Monthly breakdown
    monthly_totals = defaultdict(float)
    for t in expenses:
        month_key = t['date'].strftime('%Y-%m')
        monthly_totals[month_key] += t['amount']
    
    # Generate comprehensive report
    report = f"""# COMPLETE FINANCIAL ANALYSIS 2025-2026
## Keith Lauver | All Credit Cards + Bank Statement

---

## 🎯 EXECUTIVE SUMMARY

**TOTAL CONSOLIDATED EXPENSES:** ${total_expenses:,.2f}
**Data Sources:** Bank Statement + Credit Card Statements
**Time Period:** 2025 - January 2026
**Total Transactions Analyzed:** {len(expenses):,}

---

## 📊 EXPENSE BREAKDOWN BY SOURCE

"""
    
    for source, amount in sorted(source_totals.items(), key=lambda x: x[1], reverse=True):
        percentage = (amount / total_expenses) * 100 if total_expenses > 0 else 0
        report += f"**{source}:** ${amount:,.2f} ({percentage:.1f}%)\n"
    
    report += f"\n---\n\n## 💰 EXPENSE CATEGORIES\n\n"
    
    for category, amount in sorted(category_totals.items(), key=lambda x: x[1], reverse=True):
        percentage = (amount / total_expenses) * 100 if total_expenses > 0 else 0
        report += f"**{category}:** ${amount:,.2f} ({percentage:.1f}%)\n"
    
    report += f"\n---\n\n## 🏢 BUSINESS ENTITY ALLOCATION\n\n"
    
    for business, amount in sorted(business_totals.items(), key=lambda x: x[1], reverse=True):
        percentage = (amount / total_expenses) * 100 if total_expenses > 0 else 0
        report += f"**{business}:** ${amount:,.2f} ({percentage:.1f}%)\n"
    
    report += f"\n---\n\n## 👥 SPENDING BY CARDHOLDER\n\n"
    
    for cardholder, amount in sorted(cardholder_totals.items(), key=lambda x: x[1], reverse=True):
        percentage = (amount / total_expenses) * 100 if total_expenses > 0 else 0
        report += f"**{cardholder}:** ${amount:,.2f} ({percentage:.1f}%)\n"
    
    # Top vendors across all sources
    vendor_totals = defaultdict(float)
    for t in expenses:
        vendor_totals[t['vendor']] += t['amount']
    
    report += f"\n---\n\n## 🎯 TOP VENDORS (ALL SOURCES)\n\n"
    
    top_vendors = sorted(vendor_totals.items(), key=lambda x: x[1], reverse=True)[:25]
    for vendor, amount in top_vendors:
        vendor_display = vendor[:70] + "..." if len(vendor) > 70 else vendor
        report += f"**${amount:,.2f}** - {vendor_display}\n"
    
    # Items requiring review
    review_items = [t for t in expenses if t['business'] == 'Review Needed']
    if review_items:
        review_total = sum(t['amount'] for t in review_items)
        report += f"\n---\n\n## ⚠️ ITEMS REQUIRING CATEGORIZATION\n\n"
        report += f"**Total requiring review:** ${review_total:,.2f} ({(review_total/total_expenses)*100:.1f}% of total expenses)\n\n"
        
        review_by_source = defaultdict(float)
        for t in review_items:
            review_by_source[t['source']] += t['amount']
        
        for source, amount in sorted(review_by_source.items(), key=lambda x: x[1], reverse=True):
            report += f"**{source}:** ${amount:,.2f}\n"
        
        report += f"\n**Top items needing categorization:**\n\n"
        review_vendors = defaultdict(float)
        for t in review_items:
            review_vendors[t['vendor']] += t['amount']
        
        for vendor, amount in sorted(review_vendors.items(), key=lambda x: x[1], reverse=True)[:15]:
            report += f"**${amount:,.2f}** - {vendor}\n"
    
    # Tax summary
    atomic_total = business_totals.get('Atomic Elevator', 0)
    smp_total = business_totals.get('Space Monkey Partners', 0)
    shared_total = business_totals.get('Shared', 0)
    personal_total = business_totals.get('Personal', 0)
    review_total = business_totals.get('Review Needed', 0)
    
    report += f"\n---\n\n## 🧾 TAX PREPARATION SUMMARY\n\n"
    report += f"**Atomic Elevator Deductions:** ${atomic_total:,.2f}\n"
    report += f"**Space Monkey Partners Deductions:** ${smp_total:,.2f}\n"
    report += f"**Shared Business Expenses (Split 50/50):** ${shared_total:,.2f}\n"
    report += f"**Personal (Non-Deductible):** ${personal_total:,.2f}\n"
    report += f"**Needs Categorization:** ${review_total:,.2f}\n\n"
    
    # Estimated tax deductions
    atomic_allocation = atomic_total + (shared_total * 0.5)
    smp_allocation = smp_total + (shared_total * 0.5)
    
    report += f"**ESTIMATED TAX DEDUCTIONS:**\n"
    report += f"- **Atomic Elevator Total:** ${atomic_allocation:,.2f}\n"
    report += f"- **Space Monkey Partners Total:** ${smp_allocation:,.2f}\n"
    report += f"- **Total Business Deductions:** ${atomic_allocation + smp_allocation:,.2f}\n\n"
    
    # Key recommendations
    report += f"**IMMEDIATE ACTION ITEMS:**\n"
    report += f"1. **Categorize ${review_total:,.2f}** in uncategorized expenses\n"
    report += f"2. **Move personal expenses** (${personal_total:,.2f}) to personal payment methods\n"
    report += f"3. **Document business purpose** for travel and meal expenses\n"
    report += f"4. **Set up separate cards** for each business entity\n"
    report += f"5. **Implement monthly review** process for ongoing categorization\n\n"
    
    return report

def export_consolidated_csv(all_transactions):
    """Export all transactions to a single CSV file"""
    csv_file = 'COMPLETE_CONSOLIDATED_EXPENSES_2025-2026.csv'
    
    with open(csv_file, 'w', newline='', encoding='utf-8') as f:
        fieldnames = ['date', 'amount', 'vendor', 'cardholder', 'category', 'business', 'source', 'card_identifier']
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        
        writer.writeheader()
        for t in sorted(all_transactions, key=lambda x: x['date'], reverse=True):
            writer.writerow({
                'date': t['date'].strftime('%Y-%m-%d'),
                'amount': f"{t['amount']:.2f}",
                'vendor': t['vendor'],
                'cardholder': t['cardholder'],
                'category': t['category'],
                'business': t['business'],
                'source': t['source'],
                'card_identifier': t['card_identifier']
            })
    
    print(f"Consolidated CSV exported to {csv_file}")

def main():
    print("Processing complete financial consolidation...")
    
    all_transactions = []
    
    # Process bank statement
    print("Processing bank statement...")
    bank_transactions = process_bank_statement_enhanced()
    all_transactions.extend(bank_transactions)
    print(f"Bank: {len(bank_transactions)} transactions")
    
    # Process credit card 1
    print("Processing credit card 1...")
    cc1_transactions = process_credit_card_csv('credit_card_1.csv', '4298')
    all_transactions.extend(cc1_transactions)
    print(f"Credit Card 1: {len(cc1_transactions)} transactions")
    
    # Process credit card 2
    print("Processing credit card 2...")
    cc2_transactions = process_credit_card_csv('credit_card_2.csv', 'Mixed')
    all_transactions.extend(cc2_transactions)
    print(f"Credit Card 2: {len(cc2_transactions)} transactions")
    
    print(f"\nTotal transactions processed: {len(all_transactions)}")
    
    # Generate comprehensive report
    report = generate_comprehensive_report(all_transactions)
    
    # Save report
    report_file = 'COMPLETE_FINANCIAL_CONSOLIDATION_2025-2026.md'
    with open(report_file, 'w', encoding='utf-8') as f:
        f.write(report)
    
    print(f"Complete analysis saved to {report_file}")
    
    # Export consolidated CSV
    export_consolidated_csv(all_transactions)
    
    # Summary stats
    expenses = [t for t in all_transactions if t['category'] not in 
              ['Credit Card Payment', 'Business Revenue/Transfer']]
    total_expenses = sum(t['amount'] for t in expenses)
    
    print(f"\n=== CONSOLIDATION SUMMARY ===")
    print(f"Total Consolidated Expenses: ${total_expenses:,.2f}")
    print(f"Bank Statement: ${sum(t['amount'] for t in bank_transactions):,.2f}")
    print(f"Credit Card 1: ${sum(t['amount'] for t in cc1_transactions):,.2f}")  
    print(f"Credit Card 2: ${sum(t['amount'] for t in cc2_transactions):,.2f}")

if __name__ == "__main__":
    main()