#!/usr/bin/env python3

import csv
from collections import defaultdict

def apply_smart_categorizations():
    """Apply intelligent categorization based on vendor analysis"""
    
    # Smart categorization rules based on business knowledge
    categorization_rules = {
        # Space Monkey Partners (Marketing Consultancy)
        'hubspot': 'Space Monkey Partners',
        'storybrand': 'Space Monkey Partners', 
        'don miller': 'Space Monkey Partners',
        'fathom video': 'Space Monkey Partners',
        'video': 'Space Monkey Partners',
        'marketing': 'Space Monkey Partners',
        'creative': 'Space Monkey Partners',
        
        # Atomic Elevator (AI Platform) 
        'pilot shop': 'Atomic Elevator',
        'jeppesen': 'Atomic Elevator',
        'aviation': 'Atomic Elevator',
        'aircraft': 'Atomic Elevator',
        
        # Shared Business Expenses
        'edwards jet center': 'Shared',
        'jet center': 'Shared',
        'verizon': 'Shared',
        'venetian palazzo': 'Shared',
        'hotel': 'Shared',
        'ssbtrustops': 'Shared',
        'payroll': 'Shared',
        'ace hardware': 'Shared',
        'beartooth market': 'Shared',
        'spoke shop': 'Shared',
        'res ': 'Shared',  # Likely restaurant/business meal
        
        # Personal (Non-Deductible)
        'department of education': 'Personal',
        'education educ': 'Personal',
        'west clinic': 'Personal', 
        'candrug': 'Personal',
        'nespresso': 'Personal',
        'divers den': 'Personal',
        'red lodge': 'Personal',  # Local personal expenses
        'the local west': 'Personal',
        'big sky resort': 'Personal'
    }
    
    # Load the uncategorized data
    transactions = []
    
    try:
        with open('UNCATEGORIZED_FOR_REVIEW.csv', 'r', encoding='utf-8') as f:
            reader = csv.DictReader(f)
            
            for row in reader:
                vendor = row['vendor'].lower()
                suggested_business = 'Review Needed'  # Default
                
                # Apply categorization rules
                for keyword, business in categorization_rules.items():
                    if keyword in vendor:
                        suggested_business = business
                        break
                
                # Special case for Amazon - context needed
                if 'amazon' in vendor:
                    if float(row['amount']) > 200:
                        suggested_business = 'Atomic Elevator'  # Likely business equipment
                    else:
                        suggested_business = 'Review Needed'  # Could be either
                
                transactions.append({
                    'date': row['date'],
                    'amount': float(row['amount']),
                    'vendor': row['vendor'],
                    'suggested_business': suggested_business,
                    'original_suggestion': row['suggested_business']
                })
                
    except FileNotFoundError:
        print("Uncategorized file not found")
        return []
    
    return transactions

def generate_categorization_summary():
    """Generate a summary of categorization recommendations"""
    
    transactions = apply_smart_categorizations()
    
    # Group by suggested business
    business_totals = defaultdict(float)
    business_counts = defaultdict(int)
    business_vendors = defaultdict(list)
    
    for t in transactions:
        business = t['suggested_business']
        business_totals[business] += t['amount']
        business_counts[business] += 1
        
        if t['vendor'] not in business_vendors[business]:
            business_vendors[business].append(t['vendor'])
    
    print("💡 SMART CATEGORIZATION RECOMMENDATIONS")
    print("=" * 60)
    
    total_categorized = sum(business_totals.values())
    
    for business in ['Space Monkey Partners', 'Atomic Elevator', 'Shared', 'Personal', 'Review Needed']:
        if business in business_totals:
            amount = business_totals[business]
            count = business_counts[business]
            pct = (amount / total_categorized) * 100 if total_categorized > 0 else 0
            
            print(f"\n🎯 {business.upper()}")
            print(f"   Amount: ${amount:,.2f} ({pct:.1f}%)")
            print(f"   Transactions: {count:,}")
            
            # Show top vendors for each business
            vendors_for_business = []
            for t in transactions:
                if t['suggested_business'] == business:
                    vendors_for_business.append((t['vendor'], t['amount']))
            
            # Sort by amount and show top 5
            top_vendors = sorted(set(vendors_for_business), key=lambda x: x[1], reverse=True)[:5]
            print("   Top vendors:")
            for vendor, amount in top_vendors:
                vendor_short = vendor[:40] + "..." if len(vendor) > 40 else vendor
                print(f"     ${amount:,.2f} - {vendor_short}")
    
    return business_totals

def create_final_categorization_file(transactions):
    """Create a file with the smart categorizations applied"""
    
    csv_file = 'SMART_CATEGORIZED_EXPENSES.csv'
    
    with open(csv_file, 'w', newline='', encoding='utf-8') as f:
        fieldnames = ['date', 'amount', 'vendor', 'recommended_business', 'confidence', 'notes']
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        
        writer.writeheader()
        
        for t in sorted(transactions, key=lambda x: x['amount'], reverse=True):
            # Set confidence level
            if t['suggested_business'] in ['Space Monkey Partners', 'Atomic Elevator', 'Personal']:
                confidence = 'High'
            elif t['suggested_business'] == 'Shared':
                confidence = 'Medium'
            else:
                confidence = 'Low'
            
            # Add notes for context
            notes = ""
            vendor_lower = t['vendor'].lower()
            if 'hubspot' in vendor_lower:
                notes = "Marketing platform - clearly SMP"
            elif 'edwards jet' in vendor_lower:
                notes = "Business aviation - travel for both companies"
            elif 'education' in vendor_lower:
                notes = "Likely student loan - personal"
            elif 'amazon' in vendor_lower:
                notes = "Need to check what was purchased"
            
            writer.writerow({
                'date': t['date'],
                'amount': f"{t['amount']:.2f}",
                'vendor': t['vendor'],
                'recommended_business': t['suggested_business'],
                'confidence': confidence,
                'notes': notes
            })
    
    print(f"\n📋 Smart categorizations saved to: {csv_file}")
    print("   Review and approve these recommendations")

def main():
    transactions = apply_smart_categorizations()
    business_totals = generate_categorization_summary() 
    create_final_categorization_file(transactions)
    
    print(f"\n🎯 ALLOCATION SUMMARY:")
    atomic = business_totals.get('Atomic Elevator', 0)
    smp = business_totals.get('Space Monkey Partners', 0) 
    shared = business_totals.get('Shared', 0)
    personal = business_totals.get('Personal', 0)
    review = business_totals.get('Review Needed', 0)
    
    # Calculate tax deductions
    atomic_total = atomic + (shared * 0.5)
    smp_total = smp + (shared * 0.5)
    
    print(f"  Atomic Elevator: ${atomic_total:,.2f} (direct: ${atomic:,.2f} + 50% shared)")
    print(f"  Space Monkey Partners: ${smp_total:,.2f} (direct: ${smp:,.2f} + 50% shared)")
    print(f"  Personal (non-deductible): ${personal:,.2f}")
    print(f"  Still needs review: ${review:,.2f}")
    
    total_business_deductions = atomic_total + smp_total
    print(f"\n💰 Total Business Deductions: ${total_business_deductions:,.2f}")

if __name__ == "__main__":
    main()