#!/usr/bin/env python3
import csv
import json
from datetime import datetime
import re
import sys

# Define US tax categories for small businesses
TAX_CATEGORIES = {
    'software': 'Software/Technology',
    'travel': 'Travel', 
    'meals': 'Meals & Entertainment',
    'office': 'Office Expenses',
    'professional': 'Professional Services',
    'marketing': 'Advertising & Marketing',
    'equipment': 'Equipment',
    'supplies': 'Supplies',
    'insurance': 'Insurance',
    'interest': 'Interest/Fees',
    'training': 'Professional Development',
    'utilities': 'Utilities',
    'health': 'Health Care',
    'personal': 'Personal (Non-Deductible)'
}

def parse_date(date_str):
    """Parse various date formats to YYYY-MM-DD"""
    if not date_str:
        return ''
    
    # Try different date formats
    formats = ['%m/%d/%Y', '%Y-%m-%d', '%d/%m/%Y', '%m-%d-%Y']
    
    for fmt in formats:
        try:
            return datetime.strptime(date_str, fmt).strftime('%Y-%m-%d')
        except ValueError:
            continue
    
    return date_str  # Return as-is if can't parse

def categorize_vendor(description, amount=None):
    """Categorize vendor/description into tax categories"""
    desc_lower = description.lower()
    
    # Software/Technology
    if any(word in desc_lower for word in ['zapier', 'dropbox', 'adobe', 'microsoft', 'netflix', 'hulu', 'spotify', 'shutterstock', 'wondershare', 'grammarly', 'docusign', 'linkedin', 'peacock', 'paramount', 'samsung', 'malwarebytes', 'avast', 'headspace', 'wisestamp', 'medium', 'apple services', 'disney', 'noom', 'software', 'app', 'subscription', 'saas']):
        return 'Software/Technology'
    
    # Travel
    if any(word in desc_lower for word in ['airbnb', 'uber', 'lime', 'hotel', 'airline', 'flight', 'rental', 'parking', 'travel', 'accommodation', 'trip']):
        return 'Travel'
    
    # Meals (if under $75, might be deductible meals)
    if any(word in desc_lower for word in ['starbucks', 'restaurant', 'pizza', 'food', 'dining', 'coffee', 'meal']) and (not amount or float(str(amount).replace('$', '').replace(',', '')) < 75):
        return 'Meals & Entertainment'
    
    # Professional Services  
    if any(word in desc_lower for word in ['attorney', 'lawyer', 'accountant', 'consultant', 'professional', 'service', 'cleverbridge', 'paypal']):
        return 'Professional Services'
    
    # Marketing/Advertising
    if any(word in desc_lower for word in ['marketing', 'advertising', 'ad', 'promotion', 'constant contact', 'email']):
        return 'Advertising & Marketing'
    
    # Health Care
    if any(word in desc_lower for word in ['health', 'medical', 'drugs', 'pharmacy', 'cost plus']):
        return 'Health Care'
    
    # Interest/Fees
    if any(word in desc_lower for word in ['interest', 'fee', 'charge', 'payment', 'credit', 'autopay']):
        return 'Interest/Fees'
    
    # Office/Supplies
    if any(word in desc_lower for word in ['office', 'supplies', 'paper', 'ink', 'equipment', 'network solutions', 'hostgator', 'domain']):
        return 'Office Expenses'
    
    # Equipment
    if any(word in desc_lower for word in ['equipment', 'computer', 'phone', 'hardware', 'device', 'electronics']):
        return 'Equipment'
    
    # Personal items (likely non-deductible)
    if any(word in desc_lower for word in ['personal', 'dollar shave', 'shave', 'razor', 'personal care', 'entertainment', 'streaming']) and not any(word in desc_lower for word in ['business', 'work', 'professional']):
        return 'Personal (Non-Deductible)'
    
    # Default to Office Expenses for business-related unclear items
    return 'Office Expenses'

def determine_business(description, amount=None):
    """Determine if expense belongs to Atomic Elevator (Ella) or Space Monkey Partners"""
    desc_lower = description.lower()
    
    # Atomic Elevator / Ella indicators
    if any(word in desc_lower for word in ['ella', 'atomic', 'elevator', 'ai', 'software', 'zapier', 'dropbox', 'technical', 'development', 'api', 'saas', 'platform']):
        return 'Atomic Elevator'
    
    # Space Monkey Partners indicators  
    if any(word in desc_lower for word in ['marketing', 'advertising', 'constant contact', 'social', 'client', 'consulting', 'agency', 'creative', 'design', 'campaign']):
        return 'Space Monkey Partners'
    
    # Shared business expenses (could be either - will need manual review)
    if any(word in desc_lower for word in ['office', 'internet', 'phone', 'utilities', 'professional', 'legal', 'accounting', 'insurance', 'travel', 'meeting']):
        return 'Shared/Review Needed'
    
    # Personal expenses
    if any(word in desc_lower for word in ['personal', 'entertainment', 'streaming', 'gaming', 'personal care']):
        return 'Personal'
    
    return 'Review Needed'

def process_capital_one_csv(filename, card_name):
    """Process Capital One CSV format"""
    transactions = []
    
    with open(filename, 'r') as f:
        reader = csv.DictReader(f)
        for row in reader:
            if not row.get('Description'):
                continue
                
            amount = 0
            if row.get('Debit'):
                amount = -float(row['Debit'])
            elif row.get('Credit'):
                amount = float(row['Credit'])
            
            if amount == 0:
                continue
                
            transaction = {
                'date': parse_date(row.get('Transaction Date', '')),
                'credit_card': card_name,
                'vendor': row['Description'].strip(),
                'description': row['Description'].strip(),
                'amount': amount,
                'category': categorize_vendor(row['Description'], amount),
                'business': determine_business(row['Description'], amount)
            }
            transactions.append(transaction)
    
    return transactions

def process_paypal_csv(filename, card_name):
    """Process PayPal CSV format"""
    transactions = []
    
    with open(filename, 'r') as f:
        reader = csv.DictReader(f)
        for row in reader:
            if not row.get('Name'):
                continue
                
            gross = row.get('Gross', '0').replace(',', '')
            try:
                amount = float(gross)
            except:
                amount = 0
                
            if amount == 0:
                continue
                
            vendor = row['Name'].strip()
            description = row.get('Item Title', vendor).strip()
            
            transaction = {
                'date': parse_date(row.get('Date', '')),
                'credit_card': card_name,
                'vendor': vendor,
                'description': description,
                'amount': amount,
                'category': categorize_vendor(vendor, amount),
                'business': determine_business(vendor, amount)
            }
            transactions.append(transaction)
    
    return transactions

# Main processing
all_transactions = []

# Process Capital One files
print("Processing Capital One Card #6806...")
try:
    cap1_6806 = process_capital_one_csv('file_282---790498e8-4ed5-45a3-9502-098f1aed5330.csv', 'Capital One #6806')
    all_transactions.extend(cap1_6806)
    print(f"Added {len(cap1_6806)} transactions from Card #6806")
except Exception as e:
    print(f"Error processing Card #6806: {e}")

print("Processing Capital One Card #5597...")
try:
    cap1_5597 = process_capital_one_csv('file_283---b9dff3e5-c79e-469d-b9e3-13cdab3318d5.csv', 'Capital One #5597')
    all_transactions.extend(cap1_5597)
    print(f"Added {len(cap1_5597)} transactions from Card #5597")
except Exception as e:
    print(f"Error processing Card #5597: {e}")

print("Processing PayPal 2025...")
try:
    paypal_2025 = process_paypal_csv('file_291---c862d3c3-5d03-4e20-9f6e-1f1240e59fee.csv', 'PayPal')
    all_transactions.extend(paypal_2025)
    print(f"Added {len(paypal_2025)} transactions from PayPal 2025")
except Exception as e:
    print(f"Error processing PayPal 2025: {e}")

print("Processing PayPal Recent...")
try:
    paypal_recent = process_paypal_csv('file_290---5da83e1d-dd22-48b6-975f-4acf4a683153.csv', 'PayPal')
    all_transactions.extend(paypal_recent)
    print(f"Added {len(paypal_recent)} transactions from PayPal Recent")
except Exception as e:
    print(f"Error processing PayPal Recent: {e}")

# Sort by date
all_transactions.sort(key=lambda x: x['date'] if x['date'] else '1900-01-01')

# Write consolidated CSV
output_filename = 'consolidated_expenses_2025-2026.csv'
with open(output_filename, 'w', newline='') as f:
    fieldnames = ['date', 'credit_card', 'vendor', 'category', 'description', 'amount', 'business']
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(all_transactions)

print(f"\nConsolidated {len(all_transactions)} transactions into {output_filename}")

# Summary statistics
total_amount = sum(t['amount'] for t in all_transactions)
print(f"Total amount: ${total_amount:,.2f}")

# Category breakdown
category_totals = {}
for t in all_transactions:
    cat = t['category']
    category_totals[cat] = category_totals.get(cat, 0) + t['amount']

print("\nExpenses by Category:")
for cat, amount in sorted(category_totals.items(), key=lambda x: x[1]):
    print(f"  {cat}: ${amount:,.2f}")

# Business breakdown  
business_totals = {}
for t in all_transactions:
    biz = t['business']
    business_totals[biz] = business_totals.get(biz, 0) + t['amount']

print("\nExpenses by Business:")
for biz, amount in sorted(business_totals.items(), key=lambda x: x[1]):
    print(f"  {biz}: ${amount:,.2f}")