import gradio as gr import pickle import fasttext import numpy as np import os import torch import time from transformers import AutoTokenizer, AutoModel import torch.nn.functional as F from openai import AzureOpenAI # Azure OpenAI Configuration AZURE_API_VERSION = "2024-02-01" # Model directory MODEL_DIR = "models" # Initialize Azure OpenAI client azure_client = AzureOpenAI( api_key=os.getenv(AZURE_OPENAI_API_KEY), api_version=AZURE_API_VERSION, azure_endpoint=os.getenv(AZURE_OPENAI_EMBEDDING_ENDPOINT) ) def generate_e5_embedding(text, model_name='intfloat/multilingual-e5-large'): """Generate E5 embeddings for a single text.""" start_time = time.time() tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModel.from_pretrained(model_name) # Add prefix for E5 models text = f"query: {text}" # Tokenize and generate embedding inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) # Mean pooling attention_mask = inputs['attention_mask'] embeddings = mean_pooling(outputs.last_hidden_state, attention_mask) # Normalize embeddings embeddings = F.normalize(embeddings, p=2, dim=1) inference_time = time.time() - start_time return embeddings[0].numpy(), inference_time def generate_e5_instruct_embedding(text, model_name='intfloat/multilingual-e5-large-instruct'): """Generate E5-instruct embeddings for a single text.""" start_time = time.time() tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModel.from_pretrained(model_name) # Add prefix for E5 models text = f"query: {text}" # Tokenize and generate embedding inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) # Mean pooling attention_mask = inputs['attention_mask'] embeddings = mean_pooling(outputs.last_hidden_state, attention_mask) # Normalize embeddings embeddings = F.normalize(embeddings, p=2, dim=1) inference_time = time.time() - start_time return embeddings[0].numpy(), inference_time def generate_modernbert_embedding(text, model_name="answerdotai/ModernBERT-base"): """Generate ModernBERT embeddings for a single text.""" start_time = time.time() tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModel.from_pretrained(model_name) # Tokenize and generate embedding inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) # Take [CLS] token embedding embeddings = outputs.last_hidden_state[:, 0, :] inference_time = time.time() - start_time return embeddings[0].numpy(), inference_time def mean_pooling(token_embeddings, attention_mask): """Mean pooling function for E5 models.""" input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9) def get_azure_embedding(text): """Get embeddings from Azure OpenAI API.""" start_time = time.time() response = azure_client.embeddings.create( model="text-embedding-3-large", input=text ) inference_time = time.time() - start_time return np.array(response.data[0].embedding), inference_time # Load models def load_models(): models = {} # Load pickle models with open(os.path.join(MODEL_DIR, 'e5_classifier.pkl'), 'rb') as f: models['E5 Classifier'] = pickle.load(f) with open(os.path.join(MODEL_DIR, 'e5_large_instruct_classifier.pkl'), 'rb') as f: models['E5-Instruct Classifier'] = pickle.load(f) with open(os.path.join(MODEL_DIR, 'azure_classifier.pkl'), 'rb') as f: models['Azure Classifier'] = pickle.load(f) with open(os.path.join(MODEL_DIR, 'azure_knn_classifier.pkl'), 'rb') as f: models['Azure KNN Classifier'] = pickle.load(f) with open(os.path.join(MODEL_DIR, 'modernbert_rf_classifier.pkl'), 'rb') as f: models['ModernBERT RF Classifier'] = pickle.load(f) with open(os.path.join(MODEL_DIR, 'gte_classifier.pkl'), 'rb') as f: models['GTE Classifier'] = pickle.load(f) # Load FastText models models['FastText Raw'] = fasttext.load_model(os.path.join(MODEL_DIR, 'fasttext_raw.bin')) models['FastText Preprocessed'] = fasttext.load_model(os.path.join(MODEL_DIR, 'fasttext_preprocessed.bin')) return models def format_results(results): """Format results into HTML for better visualization.""" html = "
" html += "" html += "" html += "" html += "" html += "" html += "" html += "" for result in results: color = get_confidence_color(result['confidence']) html += f"" html += f"" html += f"" html += f"" html += f"" html += "" html += "
ModelPredictionConfidenceTime (sec)
{result['model']}{result['prediction']}{result['confidence']:.4f}{result['time']:.4f}
" return html def format_progress(progress_value, desc): """Format progress bar HTML.""" if progress_value >= 100: return "" # Return empty string when complete html = f"""
{desc}
{progress_value:.1f}%
""" return html def get_confidence_color(confidence): """Return color based on confidence score.""" if confidence >= 0.8: return "#00ff00" # Bright green for high confidence elif confidence >= 0.5: return "#ffa500" # Bright orange for medium confidence else: return "#ff4444" # Bright red for low confidence # [Add GTE embedding generation function] def generate_gte_embedding(text, model_name='Alibaba-NLP/gte-base'): """Generate GTE embeddings for a single text.""" start_time = time.time() tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModel.from_pretrained(model_name) # Tokenize and generate embedding inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) embeddings = outputs.last_hidden_state[:, 0, :] # [CLS] token embeddings = F.normalize(embeddings, p=2, dim=1) # normalize inference_time = time.time() - start_time return embeddings[0].numpy(), inference_time # Make predictions (streaming version) def predict_text_streaming(text): try: models = load_models() results = [] # First yield empty table and progress bar yield format_progress(0, "Loading models..."), format_results(results) # Process FastText models first (they're fastest as they don't need embeddings) for model_name, model in models.items(): if isinstance(model, fasttext.FastText._FastText): yield format_progress(10, f"Processing {model_name}..."), format_results(results) start_time = time.time() prediction = model.predict(text) label = prediction[0][0].replace('__label__', '') confidence = float(prediction[1][0]) inference_time = time.time() - start_time results.append({ 'model': model_name, 'prediction': label, 'confidence': confidence, 'time': inference_time }) yield format_progress(20, f"Completed {model_name}"), format_results(results) # Process E5 models yield format_progress(30, "Processing E5 Classifier..."), format_results(results) e5_embedding, embed_time = generate_e5_embedding(text) for model_name in ['E5 Classifier', 'E5-Instruct Classifier']: start_time = time.time() model = models[model_name] embedding_2d = e5_embedding.reshape(1, -1) prediction = model.predict(embedding_2d)[0] probabilities = model.predict_proba(embedding_2d)[0] confidence = max(probabilities) inference_time = time.time() - start_time results.append({ 'model': model_name, 'prediction': prediction, 'confidence': confidence, 'time': inference_time + embed_time }) yield format_progress(40, f"Completed {model_name}"), format_results(results) # Process Azure models yield format_progress(50, "Processing Azure Embeddings..."), format_results(results) azure_embedding, embed_time = get_azure_embedding(text) for model_name in ['Azure Classifier', 'Azure KNN Classifier']: start_time = time.time() model = models[model_name] embedding_2d = azure_embedding.reshape(1, -1) prediction = model.predict(embedding_2d)[0] probabilities = model.predict_proba(embedding_2d)[0] confidence = max(probabilities) inference_time = time.time() - start_time results.append({ 'model': model_name, 'prediction': prediction, 'confidence': confidence, 'time': inference_time + embed_time }) yield format_progress(70, f"Completed {model_name}"), format_results(results) # Process ModernBERT model yield format_progress(80, "Processing ModernBERT RF Classifier..."), format_results(results) modernbert_embedding, embed_time = generate_modernbert_embedding(text) model = models['ModernBERT RF Classifier'] embedding_2d = modernbert_embedding.reshape(1, -1) prediction = model.predict(embedding_2d)[0] probabilities = model.predict_proba(embedding_2d)[0] confidence = max(probabilities) inference_time = time.time() - start_time results.append({ 'model': 'ModernBERT RF Classifier', 'prediction': prediction, 'confidence': confidence, 'time': inference_time + embed_time }) yield format_progress(90, "Completed ModernBERT RF Classifier"), format_results(results) # Process GTE model yield format_progress(95, "Processing GTE Classifier..."), format_results(results) gte_embedding, embed_time = generate_gte_embedding(text) model = models['GTE Classifier'] embedding_2d = gte_embedding.reshape(1, -1) prediction = model.predict(embedding_2d)[0] probabilities = model.predict_proba(embedding_2d)[0] confidence = max(probabilities) inference_time = time.time() - start_time results.append({ 'model': 'GTE Classifier', 'prediction': prediction, 'confidence': confidence, 'time': inference_time + embed_time }) yield format_progress(100, "Completed!"), format_results(results) except Exception as e: yield "", f"
Error occurred: {str(e)}
" # Create Gradio interface with custom CSS css = """ .main { gap: 0 !important; } .contain { gap: 0 !important; } .feedback { margin-top: 0 !important; margin-bottom: 0 !important; } """ iface = gr.Interface( fn=predict_text_streaming, inputs=gr.Textbox(label="Enter text to classify", lines=3), outputs=[ gr.HTML(label="Progress"), gr.HTML(label="Model Predictions") ], title="Text Classification Model Comparison", description="Compare predictions from different text classification models (Results stream as they become available)", theme=gr.themes.Soft(), css=css ) if __name__ == "__main__": iface.launch(debug=True)