#!/usr/bin/env python3

import csv
import re
from datetime import datetime

def process_internet_expenses(input_file):
    """Process the internet expenses CSV and extract individual monthly records"""
    
    expenses = []
    current_year = None
    
    with open(input_file, 'r', encoding='utf-8') as f:
        reader = csv.reader(f)
        
        for row in reader:
            if not row or len(row) < 2:
                continue
                
            # Check if this is a year row (e.g., "2024.0", "2025.0")
            if len(row) >= 1 and str(row[0]).strip().replace('.0', '').isdigit():
                current_year = int(float(row[0]))
                continue
            
            # Skip header and summary rows
            if any(x in str(row[0]).lower() for x in ['expense report', 'date', 'quarter', 'total', 'year']):
                continue
                
            # Process monthly entries
            month_name = str(row[0]).strip()
            if month_name and month_name[0].isupper() and current_year:
                # Try to extract amount from multiple columns
                amount = None
                for col_idx in [1, 2]:
                    if len(row) > col_idx and row[col_idx]:
                        try:
                            amount_str = str(row[col_idx]).strip()
                            # Remove any non-numeric characters except decimal point
                            amount_clean = re.sub(r'[^\d.]', '', amount_str)
                            if amount_clean:
                                amount = float(amount_clean)
                                break
                        except:
                            continue
                
                if amount and amount > 0:
                    # Create transaction record
                    month_num = get_month_number(month_name)
                    if month_num:
                        date_str = f"{current_year}-{month_num:02d}-15"  # Mid-month date
                        
                        expense = {
                            'Date': date_str,
                            'Amount': amount,
                            'Vendor': 'Office Internet Service',
                            'Category': 'Atomic Elevator',
                            'Description': f'Office Internet - {month_name} {current_year} (Reimbursed)',
                            'Source': 'Office Internet Expenses'
                        }
                        expenses.append(expense)
    
    return expenses

def get_month_number(month_name):
    """Convert month name to number"""
    months = {
        'january': 1, 'february': 2, 'march': 3, 'april': 4,
        'may': 5, 'june': 6, 'july': 7, 'august': 8,
        'september': 9, 'october': 10, 'november': 11, 'december': 12
    }
    return months.get(month_name.lower())

def main():
    input_file = 'office_internet_expenses.csv'
    output_file = 'processed_internet_expenses.csv'
    
    expenses = process_internet_expenses(input_file)
    
    print(f"Processed {len(expenses)} internet expense records")
    
    # Write to CSV
    if expenses:
        with open(output_file, 'w', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=expenses[0].keys())
            writer.writeheader()
            writer.writerows(expenses)
        
        print(f"Saved to {output_file}")
        
        # Show summary
        total_amount = sum(e['Amount'] for e in expenses)
        print(f"\nSummary:")
        print(f"Total Amount: ${total_amount:,.2f}")
        print(f"Date Range: {min(e['Date'] for e in expenses)} to {max(e['Date'] for e in expenses)}")
        
        print(f"\nFirst few records:")
        for expense in expenses[:5]:
            print(f"  {expense['Date']}: ${expense['Amount']:,.2f} - {expense['Description']}")
    
    return expenses

if __name__ == "__main__":
    main()