#!/usr/bin/env python3

import zipfile
import xml.etree.ElementTree as ET
import csv
import re

def parse_shared_strings(zip_file):
    """Parse shared strings from Excel file"""
    shared_strings = []
    try:
        with zip_file.open('xl/sharedStrings.xml') as f:
            content = f.read().decode('utf-8')
            root = ET.fromstring(content)
            
            # Excel namespace
            ns = {'ss': 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'}
            
            for si in root.findall('ss:si', ns):
                t = si.find('ss:t', ns)
                if t is not None:
                    shared_strings.append(t.text or '')
                else:
                    shared_strings.append('')
    except Exception as e:
        print(f"Error parsing shared strings: {e}")
    
    return shared_strings

def parse_worksheet(zip_file, shared_strings, sheet_name='xl/worksheets/sheet1.xml'):
    """Parse worksheet data"""
    rows = []
    try:
        with zip_file.open(sheet_name) as f:
            content = f.read().decode('utf-8')
            root = ET.fromstring(content)
            
            # Excel namespace
            ns = {'ss': 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'}
            
            for row in root.findall('.//ss:row', ns):
                row_data = []
                for cell in row.findall('ss:c', ns):
                    cell_value = ""
                    
                    # Check if it's a shared string reference
                    if cell.get('t') == 's':
                        v = cell.find('ss:v', ns)
                        if v is not None:
                            idx = int(v.text)
                            if idx < len(shared_strings):
                                cell_value = shared_strings[idx]
                    else:
                        # Direct value
                        v = cell.find('ss:v', ns)
                        if v is not None:
                            cell_value = v.text or ""
                    
                    row_data.append(cell_value)
                
                if row_data:  # Only add non-empty rows
                    rows.append(row_data)
    except Exception as e:
        print(f"Error parsing worksheet: {e}")
    
    return rows

def convert_excel_to_csv(excel_file, csv_file):
    """Convert Excel file to CSV"""
    try:
        with zipfile.ZipFile(excel_file, 'r') as zip_file:
            # Parse shared strings
            shared_strings = parse_shared_strings(zip_file)
            print(f"Found {len(shared_strings)} shared strings")
            
            # Parse worksheet
            rows = parse_worksheet(zip_file, shared_strings)
            print(f"Found {len(rows)} rows")
            
            # Write to CSV
            with open(csv_file, 'w', newline='', encoding='utf-8') as f:
                writer = csv.writer(f)
                for row in rows:
                    writer.writerow(row)
            
            print(f"Successfully converted {excel_file} to {csv_file}")
            
            # Show first few rows for verification
            if rows:
                print("First few rows:")
                for i, row in enumerate(rows[:5]):
                    print(f"Row {i+1}: {row[:10]}")  # First 10 columns
                    
    except Exception as e:
        print(f"Error converting {excel_file}: {e}")

def main():
    # Convert both Excel files
    files_to_convert = [
        ('/home/ubuntu/.openclaw/media/inbound/file_292---794e659f-19d7-4fe2-9f0c-6332dbfcaa9f.xlsx', 'credit_card_1.csv'),
        ('/home/ubuntu/.openclaw/media/inbound/file_294---c2ccbad4-1116-456f-a9f8-800d69c59ab2.xlsx', 'credit_card_2.csv')
    ]
    
    for excel_file, csv_file in files_to_convert:
        print(f"\nConverting {excel_file} to {csv_file}...")
        convert_excel_to_csv(excel_file, csv_file)

if __name__ == "__main__":
    main()