#!/usr/bin/env python3
import csv
from datetime import datetime
import re

def parse_date(date_str):
    """Parse various date formats to YYYY-MM-DD"""
    if not date_str:
        return ''
    
    # Handle MM/DD/YYYY format
    if '/' in date_str:
        try:
            return datetime.strptime(date_str, '%m/%d/%Y').strftime('%Y-%m-%d')
        except:
            try:
                return datetime.strptime(date_str, '%d/%m/%Y').strftime('%Y-%m-%d')
            except:
                pass
    
    # Handle YYYY-MM-DD format
    if '-' in date_str and len(date_str) == 10:
        return date_str
        
    return date_str

def categorize_expense(description, amount=None):
    """Categorize expense for US tax purposes"""
    desc = description.lower()
    
    # Software/Technology - Business Tools
    software_keywords = ['zapier', 'dropbox', 'adobe', 'microsoft', 'docusign', 'linkedin', 'grammarly', 
                         'shutterstock', 'wondershare', 'malwarebytes', 'avast', 'cleverbridge', 'parallels',
                         'wisestamp', 'apple services', 'software', 'saas', 'app', 'tech']
    if any(word in desc for word in software_keywords):
        return 'Software/Technology'
    
    # Travel Expenses
    travel_keywords = ['airbnb', 'uber', 'lime', 'hotel', 'flight', 'travel', 'accommodation', 'ride']
    if any(word in desc for word in travel_keywords):
        return 'Travel'
    
    # Meals & Entertainment (business meals under $75)
    meal_keywords = ['starbucks', 'restaurant', 'pizza', 'coffee', 'dining', 'food']
    if any(word in desc for word in meal_keywords):
        if amount and abs(float(str(amount).replace('-', ''))) < 75:
            return 'Meals & Entertainment'
        else:
            return 'Personal (Non-Deductible)'
    
    # Professional Services
    professional_keywords = ['lawyer', 'attorney', 'accountant', 'consultant', 'professional']
    if any(word in desc for word in professional_keywords):
        return 'Professional Services'
    
    # Marketing/Advertising
    marketing_keywords = ['marketing', 'advertising', 'constant contact', 'email marketing', 'ad']
    if any(word in desc for word in marketing_keywords):
        return 'Advertising & Marketing'
    
    # Office Expenses
    office_keywords = ['network solutions', 'hostgator', 'domain', 'hosting', 'office', 'supplies']
    if any(word in desc for word in office_keywords):
        return 'Office Expenses'
    
    # Health Care
    health_keywords = ['cost plus drugs', 'health', 'medical', 'pharmacy', 'drugs']
    if any(word in desc for word in health_keywords):
        return 'Health Care'
    
    # Interest/Fees
    fee_keywords = ['interest', 'fee', 'charge', 'autopay', 'payment']
    if any(word in desc for word in fee_keywords):
        return 'Interest/Fees'
    
    # Entertainment/Streaming (likely personal)
    entertainment_keywords = ['netflix', 'hulu', 'spotify', 'disney', 'paramount', 'peacock', 'streaming']
    if any(word in desc for word in entertainment_keywords):
        return 'Personal (Non-Deductible)'
    
    # Personal Care
    personal_keywords = ['dollar shave', 'razor', 'personal care', 'shaving']
    if any(word in desc for word in personal_keywords):
        return 'Personal (Non-Deductible)'
    
    return 'Review Needed'

def assign_business(description, category):
    """Assign to Atomic Elevator, Space Monkey Partners, or mark for review"""
    desc = description.lower()
    
    # Atomic Elevator indicators (AI/Tech business)
    ella_keywords = ['zapier', 'api', 'software', 'development', 'technical', 'ai', 'platform', 'saas', 
                     'dropbox', 'tech', 'automation', 'integration']
    if any(word in desc for word in ella_keywords):
        return 'Atomic Elevator'
    
    # Space Monkey Partners indicators (Marketing consultancy)
    smp_keywords = ['marketing', 'constant contact', 'email marketing', 'advertising', 'creative', 
                    'design', 'client', 'consulting', 'agency']
    if any(word in desc for word in smp_keywords):
        return 'Space Monkey Partners'
    
    # Shared business expenses
    shared_keywords = ['travel', 'meeting', 'professional', 'office', 'phone', 'internet', 'legal',
                       'accounting', 'insurance', 'business']
    if any(word in desc for word in shared_keywords) and category != 'Personal (Non-Deductible)':
        return 'Shared Business'
    
    # Personal expenses
    if category == 'Personal (Non-Deductible)':
        return 'Personal'
    
    return 'Review Needed'

# Process all transactions
all_transactions = []

# Capital One Card #6806 transactions (from CSV data provided)
cap1_6806_data = [
    ['2025-12-16', 'INTEREST CHARGE:PURCHASES', '225.81', 'Debit'],
    ['2025-12-15', 'COST PLUS DRUGS', '16.07', 'Debit'],
    ['2025-12-14', 'COST PLUS DRUGS', '19.43', 'Debit'],
    ['2025-12-10', 'CAPITAL ONE AUTOPAY PYMT', '317.00', 'Credit'],
    # Add more transactions as needed
]

# Capital One Card #5597 transactions  
cap1_5597_data = [
    ['2025-12-23', 'TST*RED LODGE PIZZA CO', '34.45', 'Debit'],
    ['2025-12-10', 'CAPITAL ONE AUTOPAY PYMT', '137.77', 'Credit'],
    ['2025-11-23', 'LIM*RIDE 4ZRO', '4.91', 'Debit'],
    # Add more transactions as needed
]

# PayPal major transactions for 2025
paypal_major_transactions = [
    # Sample of major transactions from the data provided
    ['07/02/2025', 'Zapier, Inc.', '-29.99', 'Pro 750 (monthly)'],
    ['07/09/2025', 'Dollar Shave Club', '-12.00', 'Club Series 6 Blade'],
    ['07/09/2025', 'Hulu', '-9.99', 'Hulu (With Ads)'],
    ['07/12/2025', 'Dropbox', '-11.99', ''],
    ['07/16/2025', 'Netflix.com', '-31.98', ''],
    ['07/19/2025', 'WONDERSHARE', '-29.99', 'Wondershare UniConverter'],
    ['07/19/2025', 'Gen Digital Inc.', '-94.99', 'Avast Cleanup Premium'],
    ['07/21/2025', 'DocuSign, Inc.', '-45.00', ''],
    ['07/22/2025', 'Peacock TV', '-7.99', 'Premium'],
    ['07/24/2025', 'A Medium Corporation', '-5.00', ''],
    ['07/24/2025', 'Samsung Electronics America, Inc.', '-4.99', ''],
    ['07/25/2025', 'PARAMOUNT+', '-7.99', 'Essential (Monthly)'],
    ['07/27/2025', 'Shutterstock Inc.', '-29.00', ''],
    ['07/29/2025', 'Spotify USA Inc', '-19.99', ''],
    ['08/02/2025', 'Zapier, Inc.', '-29.99', 'Pro 750 (monthly)'],
    ['08/12/2025', 'Dropbox', '-11.99', ''],
    ['08/16/2025', 'Netflix.com', '-31.98', ''],
    ['08/21/2025', 'Network Solutions, LLC', '-92.38', 'WEB*NETWORKSOLUTIONS.COM'],
    ['08/21/2025', 'DocuSign, Inc.', '-45.00', ''],
    ['08/24/2025', 'Headspace Inc', '-95.88', ''],
    ['08/25/2025', 'cleverbridge Inc.', '-59.99', 'Malwarebytes Standard'],
    ['09/01/2025', 'WiseStamp Technologies Ltd', '-90.00', 'B2C EC Yearly'],
    ['11/02/2025', 'Airbnb Payments Inc.', '-681.83', 'Accommodations'],
    ['11/03/2025', 'Airbnb Payments Inc.', '-910.78', 'Accommodations'],
    ['11/16/2025', 'cleverbridge Inc.', '-69.99', 'Parallels Desktop'],
    ['11/18/2025', 'Airbnb Payments Inc.', '-164.86', 'Accommodations'],
    ['11/21/2025', 'Airbnb Payments Inc.', '-180.53', 'Accommodations'],
    ['11/21/2025', 'Airbnb Payments Inc.', '-397.91', 'Accommodations'],
    ['11/25/2025', 'Disney Plus', '-189.99', 'Disney+ (with Ads)'],
    ['12/02/2025', 'Zapier, Inc.', '-29.99', 'Pro 750 (monthly)'],
    ['12/12/2025', 'Zapier, Inc.', '-29.99', 'Pro 750 (monthly)'],
    ['12/16/2025', 'Netflix.com', '-31.98', ''],
    ['12/21/2025', 'DocuSign, Inc.', '-45.00', ''],
    ['12/26/2025', 'Starbucks Coffee Company', '-80.00', 'Card Auto Reload'],
    ['12/27/2025', 'Shutterstock Inc.', '-29.00', ''],
    # Add revenue items
    ['07/30/2025', 'Constant Contact, Inc', '8.21', 'CTCT Revshare'],
    ['08/28/2025', 'Constant Contact, Inc', '8.21', 'CTCT Rev Share'],
    ['09/29/2025', 'Constant Contact, Inc', '8.21', 'Revenue Share'],
]

# Process PayPal transactions
for transaction in paypal_major_transactions:
    date, vendor, amount, description = transaction
    amount_val = float(amount)
    
    category = categorize_expense(f"{vendor} {description}", amount_val)
    business = assign_business(f"{vendor} {description}", category)
    
    all_transactions.append({
        'date': parse_date(date),
        'credit_card': 'PayPal',
        'vendor': vendor,
        'category': category,
        'description': description if description else vendor,
        'amount': amount_val,
        'business': business
    })

# Sort by date
all_transactions.sort(key=lambda x: x['date'])

# Write to CSV
with open('consolidated_expenses_complete.csv', 'w', newline='') as f:
    fieldnames = ['date', 'credit_card', 'vendor', 'category', 'description', 'amount', 'business']
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(all_transactions)

print(f"Created consolidated_expenses_complete.csv with {len(all_transactions)} transactions")

# Summary by category
print("\n=== EXPENSE SUMMARY BY CATEGORY ===")
category_totals = {}
for t in all_transactions:
    cat = t['category']
    category_totals[cat] = category_totals.get(cat, 0) + t['amount']

for cat, total in sorted(category_totals.items(), key=lambda x: x[1]):
    print(f"{cat}: ${total:,.2f}")

# Summary by business
print("\n=== EXPENSE SUMMARY BY BUSINESS ===")
business_totals = {}
for t in all_transactions:
    biz = t['business']
    business_totals[biz] = business_totals.get(biz, 0) + t['amount']

for biz, total in sorted(business_totals.items(), key=lambda x: x[1]):
    print(f"{biz}: ${total:,.2f}")

print(f"\nTotal Net Amount: ${sum(t['amount'] for t in all_transactions):,.2f}")