#!/usr/bin/env python3

import csv
import re

def extract_location_from_vendor(vendor_name):
    """Extract city/state from vendor name patterns"""
    
    # Common location patterns in vendor names
    location_patterns = {
        # Montana locations
        'Bozeman': 'Bozeman, MT',
        'Billings': 'Billings, MT', 
        'Big Sky': 'Big Sky, MT',
        'Red Lodge': 'Red Lodge, MT',
        'Beartooth': 'Beartooth, MT',
        'Montana': 'Montana',
        'Yellowstone': 'Yellowstone, MT/WY',
        
        # Other known locations
        'Denver': 'Denver, CO',
        'Vegas': 'Las Vegas, NV',
        'Venetian': 'Las Vegas, NV',
        'Palazzo': 'Las Vegas, NV',
        'Edwards': 'Edwards, CO',
        'Vail': 'Vail, CO',
        'Aspen': 'Aspen, CO',
        
        # Airlines/Travel
        'Southwest': 'Airlines',
        'United': 'Airlines', 
        'American': 'Airlines',
        'Delta': 'Airlines',
        'JetBlue': 'Airlines',
        
        # Tech/Software (often remote/online)
        'Apple': 'Online/Cupertino, CA',
        'Google': 'Online/Mountain View, CA',
        'Microsoft': 'Online/Redmond, WA',
        'Adobe': 'Online/San Jose, CA',
        'Zoom': 'Online/San Jose, CA',
        'Slack': 'Online/San Francisco, CA',
        'Hubspot': 'Online/Cambridge, MA',
        'PayPal': 'Online/San Jose, CA',
        'Amazon': 'Online/Seattle, WA',
        'Netflix': 'Online/Los Gatos, CA',
        'LinkedIn': 'Online/Sunnyvale, CA',
        'Zapier': 'Online/San Francisco, CA',
        'Canva': 'Online/Sydney, AU',
        'Grammarly': 'Online/San Francisco, CA',
        'DocuSign': 'Online/San Francisco, CA',
        'Landbot': 'Online/Barcelona, ES',
        'Fathom': 'Online',
        'Starlink': 'Online/Hawthorne, CA',
        
        # Chain stores (multiple locations)
        'Starbucks': 'Chain - Multiple',
        'Best Buy': 'Chain - Multiple',
        'Costco': 'Chain - Multiple', 
        'REI': 'Chain - Multiple',
        'Ace Hardware': 'Chain - Multiple',
        'Nespresso': 'Chain - Multiple',
        
        # Medical
        'West Clinic': 'Medical Facility',
        'Intermountain': 'Intermountain Region',
        
        # Gas stations
        'Shell': 'Gas Station',
        'Exxon': 'Gas Station', 
        'Sinclair': 'Gas Station',
        'Mobil': 'Gas Station',
    }
    
    vendor_lower = vendor_name.lower()
    
    # Check for direct matches
    for pattern, location in location_patterns.items():
        if pattern.lower() in vendor_lower:
            return location
    
    # Check for state abbreviations
    state_patterns = {
        ' mt': 'Montana',
        ' co': 'Colorado', 
        ' ca': 'California',
        ' nv': 'Nevada',
        ' ut': 'Utah',
        ' wy': 'Wyoming',
        ' id': 'Idaho',
    }
    
    for pattern, state in state_patterns.items():
        if pattern in vendor_lower:
            return state
    
    # Check for specific known vendors with locations
    specific_vendors = {
        'department of education': 'Federal/Online',
        'storybrand': 'Nashville, TN', 
        'jeppesen': 'Englewood, CO',
        'murdochs': 'Montana/Wyoming Chain',
        'town & country': 'Local Grocery',
        'bridger brewing': 'Bozeman, MT',
        'summit cigar': 'Bozeman, MT',
        'lone mountain ranch': 'Big Sky, MT',
        'spoke shop': 'Travel/Bike Shop',
        'pilot shop': 'Aviation Supply',
        'divers den': 'Scuba/Diving',
        'klarna': 'Online Payment Service',
        'intuit': 'Online/Mountain View, CA',
        'verizon': 'Telecom - National',
    }
    
    for pattern, location in specific_vendors.items():
        if pattern in vendor_lower:
            return location
    
    return ''  # Unknown location

def create_categorization_with_location():
    """Create categorization file with location column"""
    
    # Keith's categorization decisions so far
    keith_decisions = {
        'Edwards Jet Center': 'Space Monkey Partners',
        'Verizon': 'Space Monkey Partners', 
        'Storybrand Don Miller': 'Atomic Elevator',
        'Department of Education Educ': 'Personal',
        'Hubspot Inc': 'Atomic Elevator',
        'Amazon': 'Space Monkey Partners',
        'West Clinic': 'Personal',
        'Divers Den Investments': 'Personal',
        'Venetian Palazzo Room': 'Space Monkey Partners',
        'Beartooth Market': 'Space Monkey Partners',
        'The Spoke Shop': 'Space Monkey Partners',
        'Res': 'Space Monkey Partners',
        'Candrug': 'Personal',
        'SSBTRUSTOPS P/R Contr': 'Space Monkey Partners',
        'The Pilot Shop': 'Space Monkey Partners',
        'Fathom Video': 'Atomic Elevator',
        'Ace Hardware': 'Atomic Elevator',
        'Nespresso': 'Space Monkey Partners',
        'The Local West End': 'Atomic Elevator',
        'Yellowstone Wildlife': 'Personal',
        'Exxon Mobil': 'Space Monkey Partners',
    }
    
    # Read the original review file
    vendors_data = []
    with open('EXPENSES_FOR_KEITH_REVIEW.csv', 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        vendors_data = list(reader)
    
    print(f"Loaded {len(vendors_data)} vendors from review file")
    
    # Apply Keith's decisions and add location info
    categorized_count = 0
    remaining_count = 0
    categorized_amount = 0
    remaining_amount = 0
    
    for vendor in vendors_data:
        vendor_name = vendor['vendor'].strip()
        amount = float(vendor['total_amount'])
        
        # Apply Keith's decision if we have one
        if vendor_name in keith_decisions:
            vendor['keith_decision'] = keith_decisions[vendor_name]
            categorized_count += 1
            categorized_amount += amount
        else:
            vendor['keith_decision'] = ''  # Empty for Keith to fill
            remaining_count += 1
            remaining_amount += amount
        
        # Add location information
        vendor['location'] = extract_location_from_vendor(vendor_name)
    
    # Sort by total amount (highest first)
    vendors_data.sort(key=lambda x: float(x['total_amount']), reverse=True)
    
    # Create final categorization file with location
    output_file = 'KEITH_EXPENSE_CATEGORIZATION_WITH_LOCATION.csv'
    with open(output_file, 'w', newline='', encoding='utf-8') as f:
        fieldnames = [
            'vendor',
            'location',
            'total_amount', 
            'transaction_count',
            'sample_date',
            'sample_amount',
            'sample_description',
            'keith_decision'
        ]
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()
        
        for vendor in vendors_data:
            writer.writerow({
                'vendor': vendor['vendor'],
                'location': vendor['location'],
                'total_amount': vendor['total_amount'],
                'transaction_count': vendor['transaction_count'],
                'sample_date': vendor['sample_date'],
                'sample_amount': vendor['sample_amount'],
                'sample_description': vendor['sample_description'],
                'keith_decision': vendor['keith_decision']
            })
    
    print(f"Created {output_file}")
    print(f"\nProgress Summary:")
    print(f"Categorized: {categorized_count} vendors (${categorized_amount:,.2f})")
    print(f"Remaining: {remaining_count} vendors (${remaining_amount:,.2f})")
    
    # Show next 20 vendors with location info
    print(f"\nNext 20 vendors Keith needs to categorize:")
    count = 0
    for vendor in vendors_data:
        if not vendor['keith_decision']:
            count += 1
            if count <= 20:
                location_str = f" ({vendor['location']})" if vendor['location'] else ""
                print(f"{count:2d}. {vendor['vendor']:<35} {location_str:<20} ${float(vendor['total_amount']):>8,.2f}")
            else:
                break
    
    print(f"\nFILE READY: {output_file}")

def main():
    create_categorization_with_location()

if __name__ == "__main__":
    main()