#!/usr/bin/env python3

import csv
from collections import defaultdict
from datetime import datetime

def load_and_analyze_uncategorized():
    """Load the clean data and analyze uncategorized expenses"""
    
    uncategorized = []
    
    try:
        with open('CLEAN_COMPLETE_EXPENSES_2025-2026.csv', 'r', encoding='utf-8') as f:
            reader = csv.DictReader(f)
            
            for row in reader:
                business = row.get('business', '')
                category = row.get('category', '')
                
                # Find items that need categorization
                if business == 'Review Needed' or category == 'Other':
                    uncategorized.append({
                        'date': row['date'],
                        'amount': float(row['amount']),
                        'vendor': row['vendor'],
                        'cardholder': row['cardholder'],
                        'category': row['category'],
                        'business': row['business'],
                        'source': row['source']
                    })
    except FileNotFoundError:
        print("Clean CSV file not found. Let me recreate it...")
        return []
    
    return uncategorized

def analyze_patterns(uncategorized):
    """Analyze patterns in uncategorized expenses"""
    
    print("🔍 ANALYZING UNCATEGORIZED EXPENSES")
    print("=" * 60)
    
    total_uncategorized = sum(t['amount'] for t in uncategorized)
    print(f"Total uncategorized: ${total_uncategorized:,.2f}")
    print(f"Number of transactions: {len(uncategorized):,}")
    print()
    
    # Group by vendor for pattern recognition
    vendor_totals = defaultdict(float)
    vendor_counts = defaultdict(int)
    
    for t in uncategorized:
        vendor = t['vendor']
        vendor_totals[vendor] += t['amount']
        vendor_counts[vendor] += 1
    
    # Show top uncategorized vendors
    print("🎯 TOP UNCATEGORIZED VENDORS:")
    top_vendors = sorted(vendor_totals.items(), key=lambda x: x[1], reverse=True)[:20]
    
    for vendor, amount in top_vendors:
        count = vendor_counts[vendor]
        vendor_short = vendor[:60] + "..." if len(vendor) > 60 else vendor
        print(f"${amount:8,.2f} ({count:2d}x) - {vendor_short}")
    
    print()
    
    return top_vendors, vendor_totals

def suggest_business_categorization(vendor, amount, date_str="", source=""):
    """Suggest which business based on vendor name and context"""
    vendor_lower = vendor.lower()
    
    # Clear Atomic Elevator indicators (AI/Tech platform)
    atomic_indicators = [
        'api', 'webhook', 'automation', 'ai', 'openai', 'anthropic',
        'platform', 'saas', 'software', 'development', 'coding',
        'server', 'hosting', 'database', 'analytics', 'integration',
        'zapier', 'github', 'aws', 'azure', 'google cloud'
    ]
    
    if any(indicator in vendor_lower for indicator in atomic_indicators):
        return "Atomic Elevator", "🤖 Tech/Platform"
    
    # Clear Space Monkey Partners indicators (Marketing consultancy)
    smp_indicators = [
        'marketing', 'advertising', 'creative', 'design', 'branding',
        'social media', 'facebook', 'linkedin', 'content', 'copy',
        'client', 'campaign', 'lead gen', 'seo', 'analytics',
        'canva', 'adobe', 'stock photo', 'video'
    ]
    
    if any(indicator in vendor_lower for indicator in smp_indicators):
        return "Space Monkey Partners", "📈 Marketing/Creative"
    
    # Business context clues
    if 'paypal inst xfer' in vendor_lower:
        # Look at the service name after PayPal
        if any(x in vendor_lower for x in ['netflix', 'spotify', 'hulu']):
            return "Personal", "🎵 Entertainment"
        elif any(x in vendor_lower for x in ['dropbox', 'zapier', 'docusign']):
            return "Atomic Elevator", "🤖 Tech Tools"
        elif 'shutterstoc' in vendor_lower:
            return "Space Monkey Partners", "📈 Stock Media"
    
    # Travel - could be either business
    if any(x in vendor_lower for x in ['hotel', 'flight', 'airline', 'uber', 'airbnb']):
        return "Shared", "✈️ Travel (Document purpose)"
    
    # Professional services
    if any(x in vendor_lower for x in ['legal', 'accounting', 'insurance', 'bank']):
        return "Shared", "💼 Professional Services"
    
    # Large amounts might be business investments
    if amount > 1000:
        return "Review Large", "💰 Large expense - needs context"
    
    # Small recurring amounts might be subscriptions
    if amount < 50:
        return "Review Small", "🔄 Likely subscription"
    
    return "Unknown", "❓ Needs manual review"

def interactive_categorization(top_vendors, vendor_totals):
    """Create an interactive categorization session"""
    
    print("🎯 BUSINESS CATEGORIZATION SUGGESTIONS")
    print("=" * 60)
    print("Based on vendor names, here are my suggestions:")
    print("(AE = Atomic Elevator, SMP = Space Monkey Partners)\n")
    
    suggestions = []
    
    for vendor, amount in top_vendors[:25]:  # Top 25 vendors
        suggestion, reason = suggest_business_categorization(vendor, amount)
        
        # Short vendor name for display
        vendor_display = vendor[:50] + "..." if len(vendor) > 50 else vendor
        
        suggestions.append({
            'vendor': vendor,
            'amount': amount,
            'suggestion': suggestion,
            'reason': reason
        })
        
        # Color coding for suggestions
        if suggestion == "Atomic Elevator":
            icon = "🤖"
        elif suggestion == "Space Monkey Partners":
            icon = "📈"
        elif suggestion == "Shared":
            icon = "🤝"
        elif suggestion == "Personal":
            icon = "👤"
        else:
            icon = "❓"
        
        print(f"{icon} ${amount:8,.2f} - {vendor_display}")
        print(f"   → Suggested: {suggestion} ({reason})")
        print()
    
    return suggestions

def create_categorization_worksheet(uncategorized):
    """Create a worksheet for manual categorization"""
    
    # Export uncategorized items to a CSV for manual review
    csv_file = 'UNCATEGORIZED_FOR_REVIEW.csv'
    
    with open(csv_file, 'w', newline='', encoding='utf-8') as f:
        fieldnames = ['date', 'amount', 'vendor', 'current_category', 'suggested_business', 
                     'suggested_reason', 'your_decision', 'notes']
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        
        writer.writeheader()
        
        for t in sorted(uncategorized, key=lambda x: x['amount'], reverse=True):
            suggestion, reason = suggest_business_categorization(t['vendor'], t['amount'])
            
            writer.writerow({
                'date': t['date'],
                'amount': f"{t['amount']:.2f}",
                'vendor': t['vendor'],
                'current_category': t['category'],
                'suggested_business': suggestion,
                'suggested_reason': reason,
                'your_decision': '',  # For Keith to fill in
                'notes': ''  # For Keith to add context
            })
    
    print(f"📋 Created categorization worksheet: {csv_file}")
    print("   Columns: date, amount, vendor, suggested_business, your_decision")
    print("   Fill in 'your_decision' with: AE, SMP, Shared, or Personal")

def main():
    print("🎯 BUSINESS EXPENSE CATEGORIZATION HELPER")
    print("=" * 60)
    
    # Load uncategorized expenses
    uncategorized = load_and_analyze_uncategorized()
    
    if not uncategorized:
        print("No uncategorized expenses found or file not available.")
        return
    
    # Analyze patterns
    top_vendors, vendor_totals = analyze_patterns(uncategorized)
    
    print()
    
    # Show suggestions
    suggestions = interactive_categorization(top_vendors, vendor_totals)
    
    print()
    
    # Create worksheet
    create_categorization_worksheet(uncategorized)
    
    # Summary of suggestions
    suggestion_summary = defaultdict(float)
    for s in suggestions:
        suggestion_summary[s['suggestion']] += s['amount']
    
    print(f"\n📊 SUGGESTED ALLOCATIONS (Top 25 vendors):")
    for category, amount in sorted(suggestion_summary.items(), key=lambda x: x[1], reverse=True):
        print(f"  {category}: ${amount:,.2f}")
    
    print(f"\n💡 NEXT STEPS:")
    print(f"1. Review the worksheet: UNCATEGORIZED_FOR_REVIEW.csv")
    print(f"2. Fill in 'your_decision' column with: AE, SMP, Shared, or Personal")  
    print(f"3. Add notes for context where needed")
    print(f"4. I can then apply your decisions to update the master file")

if __name__ == "__main__":
    main()