import gradio as gr
import pickle
import fasttext
import numpy as np
import os
import torch
import time
from transformers import AutoTokenizer, AutoModel
import torch.nn.functional as F
from openai import AzureOpenAI

# Azure OpenAI Configuration
AZURE_API_VERSION = "2024-02-01"

# Model directory
MODEL_DIR = "models"

# Initialize Azure OpenAI client
azure_client = AzureOpenAI(
    api_key=os.getenv(AZURE_OPENAI_API_KEY),
    api_version=AZURE_API_VERSION,
    azure_endpoint=os.getenv(AZURE_OPENAI_EMBEDDING_ENDPOINT)
)

def generate_e5_embedding(text, model_name='intfloat/multilingual-e5-large'):
    """Generate E5 embeddings for a single text."""
    start_time = time.time()
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModel.from_pretrained(model_name)
    
    # Add prefix for E5 models
    text = f"query: {text}"
    
    # Tokenize and generate embedding
    inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
    
    # Mean pooling
    attention_mask = inputs['attention_mask']
    embeddings = mean_pooling(outputs.last_hidden_state, attention_mask)
    # Normalize embeddings
    embeddings = F.normalize(embeddings, p=2, dim=1)
    
    inference_time = time.time() - start_time
    return embeddings[0].numpy(), inference_time

def generate_e5_instruct_embedding(text, model_name='intfloat/multilingual-e5-large-instruct'):
    """Generate E5-instruct embeddings for a single text."""
    start_time = time.time()
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModel.from_pretrained(model_name)
    
    # Add prefix for E5 models
    text = f"query: {text}"
    
    # Tokenize and generate embedding
    inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
    
    # Mean pooling
    attention_mask = inputs['attention_mask']
    embeddings = mean_pooling(outputs.last_hidden_state, attention_mask)
    # Normalize embeddings
    embeddings = F.normalize(embeddings, p=2, dim=1)
    
    inference_time = time.time() - start_time
    return embeddings[0].numpy(), inference_time

def generate_modernbert_embedding(text, model_name="answerdotai/ModernBERT-base"):
    """Generate ModernBERT embeddings for a single text."""
    start_time = time.time()
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModel.from_pretrained(model_name)
    
    # Tokenize and generate embedding
    inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
        # Take [CLS] token embedding
        embeddings = outputs.last_hidden_state[:, 0, :]
    
    inference_time = time.time() - start_time
    return embeddings[0].numpy(), inference_time

def mean_pooling(token_embeddings, attention_mask):
    """Mean pooling function for E5 models."""
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)

def get_azure_embedding(text):
    """Get embeddings from Azure OpenAI API."""
    start_time = time.time()
    response = azure_client.embeddings.create(
        model="text-embedding-3-large",
        input=text
    )
    inference_time = time.time() - start_time
    return np.array(response.data[0].embedding), inference_time

# Load models
def load_models():
    models = {}
    
    # Load pickle models
    with open(os.path.join(MODEL_DIR, 'e5_classifier.pkl'), 'rb') as f:
        models['E5 Classifier'] = pickle.load(f)
    
    with open(os.path.join(MODEL_DIR, 'e5_large_instruct_classifier.pkl'), 'rb') as f:
        models['E5-Instruct Classifier'] = pickle.load(f)
    
    with open(os.path.join(MODEL_DIR, 'azure_classifier.pkl'), 'rb') as f:
        models['Azure Classifier'] = pickle.load(f)

    with open(os.path.join(MODEL_DIR, 'azure_knn_classifier.pkl'), 'rb') as f:
        models['Azure KNN Classifier'] = pickle.load(f)
    
    with open(os.path.join(MODEL_DIR, 'modernbert_rf_classifier.pkl'), 'rb') as f:
        models['ModernBERT RF Classifier'] = pickle.load(f)

    with open(os.path.join(MODEL_DIR, 'gte_classifier.pkl'), 'rb') as f:
        models['GTE Classifier'] = pickle.load(f)
    
    # Load FastText models
    models['FastText Raw'] = fasttext.load_model(os.path.join(MODEL_DIR, 'fasttext_raw.bin'))
    models['FastText Preprocessed'] = fasttext.load_model(os.path.join(MODEL_DIR, 'fasttext_preprocessed.bin'))
    
    return models

def format_results(results):
    """Format results into HTML for better visualization."""
    html = "<div style='font-family: monospace; padding: 10px 20px;'>"
    html += "<table style='width: 100%; border-collapse: collapse; background-color: #1a1a1a; color: #ffffff; margin-bottom: 0;'>"
    html += "<tr style='background-color: #2c3e50;'>"
    html += "<th style='padding: 12px; text-align: left; border: 1px solid #34495e;'>Model</th>"
    html += "<th style='padding: 12px; text-align: left; border: 1px solid #34495e;'>Prediction</th>"
    html += "<th style='padding: 12px; text-align: left; border: 1px solid #34495e;'>Confidence</th>"
    html += "<th style='padding: 12px; text-align: left; border: 1px solid #34495e;'>Time (sec)</th>"
    html += "</tr>"
    
    for result in results:
        color = get_confidence_color(result['confidence'])
        html += f"<tr style='background-color: #2d2d2d; border-bottom: 1px solid #404040;'>"
        html += f"<td style='padding: 12px; border: 1px solid #404040;'>{result['model']}</td>"
        html += f"<td style='padding: 12px; border: 1px solid #404040;'>{result['prediction']}</td>"
        html += f"<td style='padding: 12px; border: 1px solid #404040;'><span style='color: {color}; font-weight: bold;'>{result['confidence']:.4f}</span></td>"
        html += f"<td style='padding: 12px; border: 1px solid #404040;'>{result['time']:.4f}</td>"
        html += "</tr>"
    
    html += "</table></div>"
    return html

def format_progress(progress_value, desc):
    """Format progress bar HTML."""
    if progress_value >= 100:
        return ""  # Return empty string when complete
    
    html = f"""
    <div style='width: 100%; background-color: #1a1a1a; padding: 10px; border-radius: 5px; margin-bottom: 10px;'>
        <div style='color: white; margin-bottom: 5px;'>{desc}</div>
        <div style='background-color: #2d2d2d; border-radius: 3px;'>
            <div style='background-color: #6b46c1; width: {progress_value}%; height: 20px; border-radius: 3px; transition: width 0.3s ease;'></div>
        </div>
        <div style='color: white; text-align: right; margin-top: 5px;'>{progress_value:.1f}%</div>
    </div>
    """
    return html

def get_confidence_color(confidence):
    """Return color based on confidence score."""
    if confidence >= 0.8:
        return "#00ff00"  # Bright green for high confidence
    elif confidence >= 0.5:
        return "#ffa500"  # Bright orange for medium confidence
    else:
        return "#ff4444"  # Bright red for low confidence

# [Add GTE embedding generation function]
def generate_gte_embedding(text, model_name='Alibaba-NLP/gte-base'):
    """Generate GTE embeddings for a single text."""
    start_time = time.time()
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModel.from_pretrained(model_name)
    
    # Tokenize and generate embedding
    inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
        embeddings = outputs.last_hidden_state[:, 0, :]  # [CLS] token
        embeddings = F.normalize(embeddings, p=2, dim=1)  # normalize
    
    inference_time = time.time() - start_time
    return embeddings[0].numpy(), inference_time

# Make predictions (streaming version)
def predict_text_streaming(text):
    try:
        models = load_models()
        results = []
        
        # First yield empty table and progress bar
        yield format_progress(0, "Loading models..."), format_results(results)
        
        # Process FastText models first (they're fastest as they don't need embeddings)
        for model_name, model in models.items():
            if isinstance(model, fasttext.FastText._FastText):
                yield format_progress(10, f"Processing {model_name}..."), format_results(results)
                start_time = time.time()
                prediction = model.predict(text)
                label = prediction[0][0].replace('__label__', '')
                confidence = float(prediction[1][0])
                inference_time = time.time() - start_time
                
                results.append({
                    'model': model_name,
                    'prediction': label,
                    'confidence': confidence,
                    'time': inference_time
                })
                yield format_progress(20, f"Completed {model_name}"), format_results(results)
        
        # Process E5 models
        yield format_progress(30, "Processing E5 Classifier..."), format_results(results)
        e5_embedding, embed_time = generate_e5_embedding(text)
        for model_name in ['E5 Classifier', 'E5-Instruct Classifier']:
            start_time = time.time()
            model = models[model_name]
            embedding_2d = e5_embedding.reshape(1, -1)
            prediction = model.predict(embedding_2d)[0]
            probabilities = model.predict_proba(embedding_2d)[0]
            confidence = max(probabilities)
            inference_time = time.time() - start_time
            
            results.append({
                'model': model_name,
                'prediction': prediction,
                'confidence': confidence,
                'time': inference_time + embed_time
            })
            yield format_progress(40, f"Completed {model_name}"), format_results(results)
        
        # Process Azure models
        yield format_progress(50, "Processing Azure Embeddings..."), format_results(results)
        azure_embedding, embed_time = get_azure_embedding(text)
        for model_name in ['Azure Classifier', 'Azure KNN Classifier']:
            start_time = time.time()
            model = models[model_name]
            embedding_2d = azure_embedding.reshape(1, -1)
            prediction = model.predict(embedding_2d)[0]
            probabilities = model.predict_proba(embedding_2d)[0]
            confidence = max(probabilities)
            inference_time = time.time() - start_time
            
            results.append({
                'model': model_name,
                'prediction': prediction,
                'confidence': confidence,
                'time': inference_time + embed_time
            })
            yield format_progress(70, f"Completed {model_name}"), format_results(results)
        
        # Process ModernBERT model
        yield format_progress(80, "Processing ModernBERT RF Classifier..."), format_results(results)
        modernbert_embedding, embed_time = generate_modernbert_embedding(text)
        model = models['ModernBERT RF Classifier']
        embedding_2d = modernbert_embedding.reshape(1, -1)
        prediction = model.predict(embedding_2d)[0]
        probabilities = model.predict_proba(embedding_2d)[0]
        confidence = max(probabilities)
        inference_time = time.time() - start_time
        
        results.append({
            'model': 'ModernBERT RF Classifier',
            'prediction': prediction,
            'confidence': confidence,
            'time': inference_time + embed_time
        })
        yield format_progress(90, "Completed ModernBERT RF Classifier"), format_results(results)

        # Process GTE model
        yield format_progress(95, "Processing GTE Classifier..."), format_results(results)
        gte_embedding, embed_time = generate_gte_embedding(text)
        model = models['GTE Classifier']
        embedding_2d = gte_embedding.reshape(1, -1)
        prediction = model.predict(embedding_2d)[0]
        probabilities = model.predict_proba(embedding_2d)[0]
        confidence = max(probabilities)
        inference_time = time.time() - start_time
        
        results.append({
            'model': 'GTE Classifier',
            'prediction': prediction,
            'confidence': confidence,
            'time': inference_time + embed_time
        })
        yield format_progress(100, "Completed!"), format_results(results)
        
    except Exception as e:
        yield "", f"<div style='color: red; padding: 20px;'>Error occurred: {str(e)}</div>"

# Create Gradio interface with custom CSS
css = """
.main {
    gap: 0 !important;
}
.contain {
    gap: 0 !important;
}
.feedback {
    margin-top: 0 !important;
    margin-bottom: 0 !important;
}
"""

iface = gr.Interface(
    fn=predict_text_streaming,
    inputs=gr.Textbox(label="Enter text to classify", lines=3),
    outputs=[
        gr.HTML(label="Progress"),
        gr.HTML(label="Model Predictions")
    ],
    title="Text Classification Model Comparison",
    description="Compare predictions from different text classification models (Results stream as they become available)",
    theme=gr.themes.Soft(),
    css=css
)

if __name__ == "__main__":
    iface.launch(debug=True)