#!/usr/bin/env python3

import csv
import re
from collections import defaultdict
from datetime import datetime

def clean_amount(amount_str):
    """Convert amount string to float, handling various formats"""
    if not amount_str or amount_str == '':
        return 0.0
    
    # Remove commas and spaces, handle negative amounts
    amount_str = str(amount_str).replace(',', '').replace(' ', '').strip()
    
    # Handle empty or non-numeric values
    if amount_str == '' or amount_str == '-' or amount_str == 'nan':
        return 0.0
    
    try:
        return float(amount_str)
    except:
        return 0.0

def categorize_expense(vendor_name, description=""):
    """Categorize expenses based on vendor name and description"""
    vendor = vendor_name.lower()
    desc = description.lower()
    
    # Software/Technology - Atomic Elevator
    if any(x in vendor for x in ['zapier', 'dropbox', 'docusign', 'cleverbridg', 'avast', 'malwarebytes', 'grammarly']):
        return 'Software/Technology', 'Atomic Elevator'
    
    if any(x in vendor for x in ['wisestamp', 'linkedin', 'networksolu']):
        return 'Software/Technology', 'Atomic Elevator'
    
    # Entertainment/Personal - Non-deductible
    if any(x in vendor for x in ['spotify', 'peacocktv', 'hulu', 'disney', 'netflix', 'paramount', 'dollarshave', 'headspace', 'noom']):
        return 'Entertainment/Personal', 'Personal'
    
    # Marketing/Content - Space Monkey Partners  
    if any(x in vendor for x in ['shutterstoc', 'canva', 'constant contact']):
        return 'Marketing/Content', 'Space Monkey Partners'
    
    # Travel/Accommodation - Could be either business
    if any(x in vendor for x in ['airbnb', 'uber', 'starbucks']):
        return 'Travel', 'Shared'
    
    # Insurance - Shared business expense
    if any(x in vendor for x in ['first insurance', 'state farm']):
        return 'Insurance', 'Shared'
    
    # Utilities/Infrastructure - Shared
    if any(x in vendor for x in ['spectrum', 'hostgator']):
        return 'Office/Utilities', 'Shared'
    
    # Payroll/Employee costs - Shared
    if any(x in vendor for x in ['gusto', 'guideline']):
        return 'Payroll/Benefits', 'Shared'
    
    # Professional services
    if any(x in vendor for x in ['innago', 'ssbtrustops', 'montana secretar']):
        return 'Professional Services', 'Shared'
    
    # Credit Card Payments (not actual expenses)
    if any(x in vendor for x in ['amex epayment', 'capital one', 'jpmorgan chase', 'applecard']):
        return 'Credit Card Payment', 'Transfer'
    
    # Business Revenue/Transfers (not expenses)
    if any(x in vendor for x in ['space monkey par', 'atomic elevator', 'cornerstone', 'deposit', 'mercury', 'red oxx', 'mobile capture', 'tremendous']):
        return 'Business Revenue/Transfer', 'Transfer'
    
    # Divvy credit card expenses (these are actual business expenses)
    if 'divvy' in vendor:
        return 'Business Credit Card', 'Shared'
    
    # Service charges
    if 'service charge' in vendor:
        return 'Banking Fees', 'Shared'
    
    # Items that need manual review
    if any(x in vendor for x in ['medium', 'samsung', 'apple.com', 'affirm', 'wifionboard', 'townlapoint']):
        return 'Review Needed', 'Review Needed'
    
    # Default category for unrecognized vendors
    return 'Other', 'Review Needed'

def process_bank_statement():
    """Process the bank statement CSV file"""
    csv_file = '/home/ubuntu/.openclaw/media/inbound/file_293---5c15fc2d-5f20-4d35-972b-6da60ad73643.csv'
    
    transactions = []
    
    with open(csv_file, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        
        for row in reader:
            # Skip empty rows
            if not any(row.values()):
                continue
                
            # Extract key fields
            date_str = row.get('Date', '')
            debit = clean_amount(row.get('Debit', ''))
            credit = clean_amount(row.get('Credit', ''))
            description = row.get('Description', '')
            
            # Parse date
            try:
                date = datetime.strptime(date_str, '%m/%d/%Y')
            except:
                continue
            
            # Process both debits (expenses) and credits (income)
            if debit > 0:
                category, business = categorize_expense(description, description)
                
                transactions.append({
                    'date': date,
                    'amount': debit,
                    'type': 'Expense',
                    'vendor': description,
                    'category': category,
                    'business': business,
                    'source': 'Bank Statement'
                })
            elif credit > 0:
                # Track income/revenue for context
                transactions.append({
                    'date': date,
                    'amount': credit,
                    'type': 'Revenue',
                    'vendor': description,
                    'category': 'Revenue',
                    'business': 'Revenue',
                    'source': 'Bank Statement'
                })
    
    return transactions

def generate_report(transactions):
    """Generate comprehensive expense analysis report"""
    
    # Separate expenses from revenue/transfers
    expenses = [t for t in transactions if t['type'] == 'Expense' and t['category'] not in ['Credit Card Payment', 'Business Revenue/Transfer']]
    revenue = [t for t in transactions if t['type'] == 'Revenue']
    
    total_expenses = sum(t['amount'] for t in expenses)
    total_revenue = sum(t['amount'] for t in revenue)
    
    # Category breakdown
    category_totals = defaultdict(float)
    for t in expenses:
        category_totals[t['category']] += t['amount']
    
    # Business entity breakdown
    business_totals = defaultdict(float)
    for t in expenses:
        business_totals[t['business']] += t['amount']
    
    # Vendor breakdown
    vendor_totals = defaultdict(float)
    for t in expenses:
        vendor_totals[t['vendor']] += t['amount']
    
    # Monthly breakdown
    monthly_expenses = defaultdict(float)
    monthly_revenue = defaultdict(float)
    for t in expenses:
        month_key = t['date'].strftime('%Y-%m')
        monthly_expenses[month_key] += t['amount']
    
    for t in revenue:
        month_key = t['date'].strftime('%Y-%m')
        monthly_revenue[month_key] += t['amount']
    
    # Generate report
    report = f"""# COMPREHENSIVE EXPENSE ANALYSIS - 2025
## Keith Lauver | Complete Financial Overview

---

## 📊 EXECUTIVE SUMMARY

**Total Business Expenses:** ${total_expenses:,.2f}
**Total Business Revenue:** ${total_revenue:,.2f}
**Net Cash Flow:** ${total_revenue - total_expenses:,.2f}

---

## 💰 EXPENSE BREAKDOWN BY CATEGORY

"""
    
    for category, amount in sorted(category_totals.items(), key=lambda x: x[1], reverse=True):
        percentage = (amount / total_expenses) * 100 if total_expenses > 0 else 0
        report += f"**{category}:** ${amount:,.2f} ({percentage:.1f}%)\n"
    
    report += f"\n---\n\n## 🏢 BUSINESS ENTITY ALLOCATION\n\n"
    
    for business, amount in sorted(business_totals.items(), key=lambda x: x[1], reverse=True):
        percentage = (amount / total_expenses) * 100 if total_expenses > 0 else 0
        report += f"**{business}:** ${amount:,.2f} ({percentage:.1f}%)\n"
    
    # Top vendors
    report += f"\n---\n\n## 🎯 TOP EXPENSE VENDORS\n\n"
    
    top_vendors = sorted(vendor_totals.items(), key=lambda x: x[1], reverse=True)[:20]
    for vendor, amount in top_vendors:
        vendor_display = vendor[:60] + "..." if len(vendor) > 60 else vendor
        report += f"**${amount:,.2f}** - {vendor_display}\n"
    
    # Items requiring review
    review_items = [t for t in expenses if t['business'] == 'Review Needed']
    if review_items:
        review_total = sum(t['amount'] for t in review_items)
        report += f"\n---\n\n## ⚠️ ITEMS REQUIRING MANUAL CATEGORIZATION\n\n"
        report += f"**Total requiring review:** ${review_total:,.2f} ({(review_total/total_expenses)*100:.1f}% of expenses)\n\n"
        
        review_vendors = defaultdict(float)
        for t in review_items:
            review_vendors[t['vendor']] += t['amount']
        
        for vendor, amount in sorted(review_vendors.items(), key=lambda x: x[1], reverse=True):
            report += f"**${amount:,.2f}** - {vendor}\n"
    
    # Monthly analysis
    report += f"\n---\n\n## 📅 MONTHLY CASH FLOW ANALYSIS\n\n"
    
    all_months = sorted(set(list(monthly_expenses.keys()) + list(monthly_revenue.keys())))
    
    for month in all_months:
        month_name = datetime.strptime(month, '%Y-%m').strftime('%B %Y')
        exp = monthly_expenses.get(month, 0)
        rev = monthly_revenue.get(month, 0)
        net = rev - exp
        
        report += f"**{month_name}:**\n"
        report += f"  - Revenue: ${rev:,.2f}\n"
        report += f"  - Expenses: ${exp:,.2f}\n"
        report += f"  - Net: ${net:,.2f}\n\n"
    
    # Tax-focused recommendations
    report += f"\n---\n\n## 🧾 TAX PREPARATION INSIGHTS\n\n"
    
    atomic_expenses = sum(t['amount'] for t in expenses if t['business'] == 'Atomic Elevator')
    smp_expenses = sum(t['amount'] for t in expenses if t['business'] == 'Space Monkey Partners')
    personal_expenses = sum(t['amount'] for t in expenses if t['business'] == 'Personal')
    shared_expenses = sum(t['amount'] for t in expenses if t['business'] == 'Shared')
    
    report += f"**Atomic Elevator Deductions:** ${atomic_expenses:,.2f}\n"
    report += f"**Space Monkey Partners Deductions:** ${smp_expenses:,.2f}\n"
    report += f"**Shared Business Expenses (50/50):** ${shared_expenses:,.2f}\n"
    report += f"**Personal (Non-Deductible):** ${personal_expenses:,.2f}\n\n"
    
    report += f"**Recommended Actions:**\n"
    if review_total > 0:
        report += f"1. Categorize ${review_total:,.2f} in uncategorized expenses\n"
    report += f"2. Move personal subscriptions (${personal_expenses:,.2f}) to personal payment methods\n"
    report += f"3. Document business purpose for all travel expenses\n"
    report += f"4. Implement monthly expense review process\n\n"
    
    return report

def export_csv(transactions):
    """Export transactions to CSV for further analysis"""
    csv_file = 'Complete_Business_Expenses_2025.csv'
    
    with open(csv_file, 'w', newline='', encoding='utf-8') as f:
        fieldnames = ['date', 'amount', 'type', 'vendor', 'category', 'business', 'source']
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        
        writer.writeheader()
        for t in transactions:
            writer.writerow({
                'date': t['date'].strftime('%Y-%m-%d'),
                'amount': t['amount'],
                'type': t['type'],
                'vendor': t['vendor'],
                'category': t['category'],
                'business': t['business'],
                'source': t['source']
            })
    
    print(f"CSV export saved to {csv_file}")

def main():
    print("Processing comprehensive bank statement analysis...")
    
    # Process bank statement
    transactions = process_bank_statement()
    print(f"Processed {len(transactions)} total transactions")
    
    # Generate analysis report
    report = generate_report(transactions)
    
    # Save report
    report_file = 'Complete_Financial_Analysis_2025.md'
    with open(report_file, 'w', encoding='utf-8') as f:
        f.write(report)
    
    print(f"Analysis complete! Report saved to {report_file}")
    
    # Export CSV
    export_csv(transactions)
    
    print("\nAnalysis Summary:")
    expenses = [t for t in transactions if t['type'] == 'Expense' and t['category'] not in ['Credit Card Payment', 'Business Revenue/Transfer']]
    revenue = [t for t in transactions if t['type'] == 'Revenue']
    
    print(f"  Total Expenses: ${sum(t['amount'] for t in expenses):,.2f}")
    print(f"  Total Revenue: ${sum(t['amount'] for t in revenue):,.2f}")
    print(f"  Net Cash Flow: ${sum(t['amount'] for t in revenue) - sum(t['amount'] for t in expenses):,.2f}")

if __name__ == "__main__":
    main()