import gradio as gr
import pickle
import fasttext
import numpy as np
import os
import torch
import time
from transformers import AutoTokenizer, AutoModel
import torch.nn.functional as F
from openai import AzureOpenAI
# Azure OpenAI Configuration
AZURE_API_VERSION = "2024-02-01"
# Model directory
MODEL_DIR = "models"
# Initialize Azure OpenAI client
azure_client = AzureOpenAI(
api_key=os.getenv(AZURE_OPENAI_API_KEY),
api_version=AZURE_API_VERSION,
azure_endpoint=os.getenv(AZURE_OPENAI_EMBEDDING_ENDPOINT)
)
def generate_e5_embedding(text, model_name='intfloat/multilingual-e5-large'):
"""Generate E5 embeddings for a single text."""
start_time = time.time()
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
# Add prefix for E5 models
text = f"query: {text}"
# Tokenize and generate embedding
inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
# Mean pooling
attention_mask = inputs['attention_mask']
embeddings = mean_pooling(outputs.last_hidden_state, attention_mask)
# Normalize embeddings
embeddings = F.normalize(embeddings, p=2, dim=1)
inference_time = time.time() - start_time
return embeddings[0].numpy(), inference_time
def generate_e5_instruct_embedding(text, model_name='intfloat/multilingual-e5-large-instruct'):
"""Generate E5-instruct embeddings for a single text."""
start_time = time.time()
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
# Add prefix for E5 models
text = f"query: {text}"
# Tokenize and generate embedding
inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
# Mean pooling
attention_mask = inputs['attention_mask']
embeddings = mean_pooling(outputs.last_hidden_state, attention_mask)
# Normalize embeddings
embeddings = F.normalize(embeddings, p=2, dim=1)
inference_time = time.time() - start_time
return embeddings[0].numpy(), inference_time
def generate_modernbert_embedding(text, model_name="answerdotai/ModernBERT-base"):
"""Generate ModernBERT embeddings for a single text."""
start_time = time.time()
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
# Tokenize and generate embedding
inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
# Take [CLS] token embedding
embeddings = outputs.last_hidden_state[:, 0, :]
inference_time = time.time() - start_time
return embeddings[0].numpy(), inference_time
def mean_pooling(token_embeddings, attention_mask):
"""Mean pooling function for E5 models."""
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
def get_azure_embedding(text):
"""Get embeddings from Azure OpenAI API."""
start_time = time.time()
response = azure_client.embeddings.create(
model="text-embedding-3-large",
input=text
)
inference_time = time.time() - start_time
return np.array(response.data[0].embedding), inference_time
# Load models
def load_models():
models = {}
# Load pickle models
with open(os.path.join(MODEL_DIR, 'e5_classifier.pkl'), 'rb') as f:
models['E5 Classifier'] = pickle.load(f)
with open(os.path.join(MODEL_DIR, 'e5_large_instruct_classifier.pkl'), 'rb') as f:
models['E5-Instruct Classifier'] = pickle.load(f)
with open(os.path.join(MODEL_DIR, 'azure_classifier.pkl'), 'rb') as f:
models['Azure Classifier'] = pickle.load(f)
with open(os.path.join(MODEL_DIR, 'azure_knn_classifier.pkl'), 'rb') as f:
models['Azure KNN Classifier'] = pickle.load(f)
with open(os.path.join(MODEL_DIR, 'modernbert_rf_classifier.pkl'), 'rb') as f:
models['ModernBERT RF Classifier'] = pickle.load(f)
with open(os.path.join(MODEL_DIR, 'gte_classifier.pkl'), 'rb') as f:
models['GTE Classifier'] = pickle.load(f)
# Load FastText models
models['FastText Raw'] = fasttext.load_model(os.path.join(MODEL_DIR, 'fasttext_raw.bin'))
models['FastText Preprocessed'] = fasttext.load_model(os.path.join(MODEL_DIR, 'fasttext_preprocessed.bin'))
return models
def format_results(results):
"""Format results into HTML for better visualization."""
html = "
"
html += "
"
html += ""
html += "Model | "
html += "Prediction | "
html += "Confidence | "
html += "Time (sec) | "
html += "
"
for result in results:
color = get_confidence_color(result['confidence'])
html += f""
html += f"{result['model']} | "
html += f"{result['prediction']} | "
html += f"{result['confidence']:.4f} | "
html += f"{result['time']:.4f} | "
html += "
"
html += "
"
return html
def format_progress(progress_value, desc):
"""Format progress bar HTML."""
if progress_value >= 100:
return "" # Return empty string when complete
html = f"""
{desc}
{progress_value:.1f}%
"""
return html
def get_confidence_color(confidence):
"""Return color based on confidence score."""
if confidence >= 0.8:
return "#00ff00" # Bright green for high confidence
elif confidence >= 0.5:
return "#ffa500" # Bright orange for medium confidence
else:
return "#ff4444" # Bright red for low confidence
# [Add GTE embedding generation function]
def generate_gte_embedding(text, model_name='Alibaba-NLP/gte-base'):
"""Generate GTE embeddings for a single text."""
start_time = time.time()
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
# Tokenize and generate embedding
inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
embeddings = outputs.last_hidden_state[:, 0, :] # [CLS] token
embeddings = F.normalize(embeddings, p=2, dim=1) # normalize
inference_time = time.time() - start_time
return embeddings[0].numpy(), inference_time
# Make predictions (streaming version)
def predict_text_streaming(text):
try:
models = load_models()
results = []
# First yield empty table and progress bar
yield format_progress(0, "Loading models..."), format_results(results)
# Process FastText models first (they're fastest as they don't need embeddings)
for model_name, model in models.items():
if isinstance(model, fasttext.FastText._FastText):
yield format_progress(10, f"Processing {model_name}..."), format_results(results)
start_time = time.time()
prediction = model.predict(text)
label = prediction[0][0].replace('__label__', '')
confidence = float(prediction[1][0])
inference_time = time.time() - start_time
results.append({
'model': model_name,
'prediction': label,
'confidence': confidence,
'time': inference_time
})
yield format_progress(20, f"Completed {model_name}"), format_results(results)
# Process E5 models
yield format_progress(30, "Processing E5 Classifier..."), format_results(results)
e5_embedding, embed_time = generate_e5_embedding(text)
for model_name in ['E5 Classifier', 'E5-Instruct Classifier']:
start_time = time.time()
model = models[model_name]
embedding_2d = e5_embedding.reshape(1, -1)
prediction = model.predict(embedding_2d)[0]
probabilities = model.predict_proba(embedding_2d)[0]
confidence = max(probabilities)
inference_time = time.time() - start_time
results.append({
'model': model_name,
'prediction': prediction,
'confidence': confidence,
'time': inference_time + embed_time
})
yield format_progress(40, f"Completed {model_name}"), format_results(results)
# Process Azure models
yield format_progress(50, "Processing Azure Embeddings..."), format_results(results)
azure_embedding, embed_time = get_azure_embedding(text)
for model_name in ['Azure Classifier', 'Azure KNN Classifier']:
start_time = time.time()
model = models[model_name]
embedding_2d = azure_embedding.reshape(1, -1)
prediction = model.predict(embedding_2d)[0]
probabilities = model.predict_proba(embedding_2d)[0]
confidence = max(probabilities)
inference_time = time.time() - start_time
results.append({
'model': model_name,
'prediction': prediction,
'confidence': confidence,
'time': inference_time + embed_time
})
yield format_progress(70, f"Completed {model_name}"), format_results(results)
# Process ModernBERT model
yield format_progress(80, "Processing ModernBERT RF Classifier..."), format_results(results)
modernbert_embedding, embed_time = generate_modernbert_embedding(text)
model = models['ModernBERT RF Classifier']
embedding_2d = modernbert_embedding.reshape(1, -1)
prediction = model.predict(embedding_2d)[0]
probabilities = model.predict_proba(embedding_2d)[0]
confidence = max(probabilities)
inference_time = time.time() - start_time
results.append({
'model': 'ModernBERT RF Classifier',
'prediction': prediction,
'confidence': confidence,
'time': inference_time + embed_time
})
yield format_progress(90, "Completed ModernBERT RF Classifier"), format_results(results)
# Process GTE model
yield format_progress(95, "Processing GTE Classifier..."), format_results(results)
gte_embedding, embed_time = generate_gte_embedding(text)
model = models['GTE Classifier']
embedding_2d = gte_embedding.reshape(1, -1)
prediction = model.predict(embedding_2d)[0]
probabilities = model.predict_proba(embedding_2d)[0]
confidence = max(probabilities)
inference_time = time.time() - start_time
results.append({
'model': 'GTE Classifier',
'prediction': prediction,
'confidence': confidence,
'time': inference_time + embed_time
})
yield format_progress(100, "Completed!"), format_results(results)
except Exception as e:
yield "", f"Error occurred: {str(e)}
"
# Create Gradio interface with custom CSS
css = """
.main {
gap: 0 !important;
}
.contain {
gap: 0 !important;
}
.feedback {
margin-top: 0 !important;
margin-bottom: 0 !important;
}
"""
iface = gr.Interface(
fn=predict_text_streaming,
inputs=gr.Textbox(label="Enter text to classify", lines=3),
outputs=[
gr.HTML(label="Progress"),
gr.HTML(label="Model Predictions")
],
title="Text Classification Model Comparison",
description="Compare predictions from different text classification models (Results stream as they become available)",
theme=gr.themes.Soft(),
css=css
)
if __name__ == "__main__":
iface.launch(debug=True)