import gradio as gr import pickle import fasttext import numpy as np import os import torch import time from transformers import AutoTokenizer, AutoModel import torch.nn.functional as F from openai import AzureOpenAI from dotenv import load_dotenv from config import get_fasttext_path, is_model_enabled load_dotenv() # Azure OpenAI Configuration AZURE_API_VERSION = "2024-02-01" # Model directory MODEL_DIR = "models" # Initialize Azure OpenAI client azure_client = AzureOpenAI( api_key=os.getenv("AZURE_OPENAI_API_KEY"), api_version=AZURE_API_VERSION, azure_endpoint=os.getenv("AZURE_OPENAI_EMBEDDING_ENDPOINT") ) def generate_e5_embedding(text, model_name='intfloat/multilingual-e5-large'): """Generate E5 embeddings for a single text.""" start_time = time.time() tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModel.from_pretrained(model_name) # Add prefix for E5 models text = f"query: {text}" # Tokenize and generate embedding inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) # Mean pooling attention_mask = inputs['attention_mask'] embeddings = mean_pooling(outputs.last_hidden_state, attention_mask) # Normalize embeddings embeddings = F.normalize(embeddings, p=2, dim=1) inference_time = time.time() - start_time return embeddings[0].numpy(), inference_time def generate_e5_instruct_embedding(text, model_name='intfloat/multilingual-e5-large-instruct'): """Generate E5-instruct embeddings for a single text.""" start_time = time.time() tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModel.from_pretrained(model_name) # Add prefix for E5 models text = f"query: {text}" # Tokenize and generate embedding inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) # Mean pooling attention_mask = inputs['attention_mask'] embeddings = mean_pooling(outputs.last_hidden_state, attention_mask) # Normalize embeddings embeddings = F.normalize(embeddings, p=2, dim=1) inference_time = time.time() - start_time return embeddings[0].numpy(), inference_time def mean_pooling(token_embeddings, attention_mask): """Mean pooling function for E5 models.""" input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9) def get_azure_embedding(text): """Get embeddings from Azure OpenAI API.""" start_time = time.time() response = azure_client.embeddings.create( model="text-embedding-3-large", input=text ) inference_time = time.time() - start_time return np.array(response.data[0].embedding), inference_time # Load models def load_models(): models = {} # Load pickle models only if enabled pickle_models = { 'E5 Classifier': 'e5_classifier.pkl', 'E5-Instruct Classifier': 'e5_large_instruct_classifier.pkl', 'Azure Classifier': 'azure_classifier.pkl', 'Azure KNN Classifier': 'azure_knn_classifier.pkl', 'GTE Classifier': 'gte_classifier.pkl' } for model_name, filename in pickle_models.items(): if is_model_enabled(model_name): with open(os.path.join(MODEL_DIR, filename), 'rb') as f: models[model_name] = pickle.load(f) # Load FastText models if is_model_enabled('FastText Default'): models['FastText Default'] = fasttext.load_model(get_fasttext_path('fasttext_default')) if is_model_enabled('FastText Preprocessed'): models['FastText Preprocessed'] = fasttext.load_model(get_fasttext_path('fasttext_preprocessed')) if is_model_enabled('Fasttext WordnNGram 1'): models['Fasttext WordnNGram 1'] = fasttext.load_model(get_fasttext_path('word_n_gram_1')) if is_model_enabled('Fasttext WordnNGram 2'): models['Fasttext WordnNGram 2'] = fasttext.load_model(get_fasttext_path('word_n_gram_2')) if is_model_enabled('Fasttext WordnNGram 3'): models['Fasttext WordnNGram 3'] = fasttext.load_model(get_fasttext_path('word_n_gram_3')) if is_model_enabled('Fasttext Low Overfit'): models['Fasttext Low Overfit'] = fasttext.load_model(get_fasttext_path('low_overfit')) return models def format_results(results): """Format results into HTML for better visualization.""" html = "
" html += "" html += "" html += "" html += "" html += "" html += "" html += "" for result in results: confidence_color = get_confidence_color(result['confidence']) html += f"" html += f"" html += f"" html += f"" html += f"" html += "" html += "
ModelPredictionConfidenceTime (sec)
{result['model']}{result['prediction']}{result['confidence']:.4f}{result['time']:.4f}
" return html def format_progress(progress_value, desc): """Format progress bar HTML.""" if progress_value >= 100: return "" # Return empty string when complete html = f"""
{desc}
{progress_value:.1f}%
""" return html def get_confidence_color(confidence): """Return color based on confidence score.""" if confidence >= 0.8: return "#00ff00" # Bright green for high confidence elif confidence >= 0.5: return "#ffa500" # Bright orange for medium confidence else: return "#ff4444" # Bright red for low confidence # [Add GTE embedding generation function] def generate_gte_embedding(text, model_name='Alibaba-NLP/gte-multilingual-base'): """Generate GTE embeddings for a single text.""" start_time = time.time() tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) model = AutoModel.from_pretrained(model_name, trust_remote_code=True) # Tokenize and generate embedding inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) embeddings = outputs.last_hidden_state[:, 0, :] # [CLS] token embeddings = F.normalize(embeddings, p=2, dim=1) # normalize inference_time = time.time() - start_time return embeddings[0].numpy(), inference_time # Make predictions (streaming version) def predict_text_streaming(text): try: models = load_models() results = [] if not models: return "", "
No models are enabled in the configuration.
" # Calculate progress step based on number of enabled models progress_step = 100.0 / len(models) current_progress = 0 # First yield empty table and progress bar yield format_progress(current_progress, "Loading models..."), format_results(results) # Process FastText models first (they're fastest) for model_name, model in models.items(): if isinstance(model, fasttext.FastText._FastText): yield format_progress(current_progress, f"Processing {model_name}..."), format_results(results) start_time = time.time() prediction = model.predict(text) label = prediction[0][0].replace('__label__', '') confidence = float(prediction[1][0]) inference_time = time.time() - start_time results.append({ 'model': model_name, 'prediction': label, 'confidence': confidence, 'time': inference_time }) current_progress += progress_step yield format_progress(current_progress, f"Completed {model_name}"), format_results(results) # Process E5-based models e5_embedding = None for model_name, model in models.items(): if model_name in ['E5 Classifier', 'E5-Instruct Classifier']: if e5_embedding is None: # Generate embedding only once yield format_progress(current_progress, f"Generating E5 embeddings..."), format_results(results) e5_embedding, embed_time = generate_e5_embedding(text) start_time = time.time() embedding_2d = e5_embedding.reshape(1, -1) prediction = model.predict(embedding_2d)[0] probabilities = model.predict_proba(embedding_2d)[0] confidence = max(probabilities) inference_time = time.time() - start_time results.append({ 'model': model_name, 'prediction': prediction, 'confidence': confidence, 'time': inference_time + embed_time }) current_progress += progress_step yield format_progress(current_progress, f"Completed {model_name}"), format_results(results) # Process Azure-based models azure_embedding = None for model_name, model in models.items(): if model_name in ['Azure Classifier', 'Azure KNN Classifier']: if azure_embedding is None: # Generate embedding only once yield format_progress(current_progress, "Generating Azure embeddings..."), format_results(results) azure_embedding, embed_time = get_azure_embedding(text) start_time = time.time() embedding_2d = azure_embedding.reshape(1, -1) prediction = model.predict(embedding_2d)[0] probabilities = model.predict_proba(embedding_2d)[0] confidence = max(probabilities) inference_time = time.time() - start_time results.append({ 'model': model_name, 'prediction': prediction, 'confidence': confidence, 'time': inference_time + embed_time }) current_progress += progress_step yield format_progress(current_progress, f"Completed {model_name}"), format_results(results) # Process GTE model if 'GTE Classifier' in models: yield format_progress(current_progress, "Processing GTE Classifier..."), format_results(results) gte_embedding, embed_time = generate_gte_embedding(text) model = models['GTE Classifier'] embedding_2d = gte_embedding.reshape(1, -1) prediction = model.predict(embedding_2d)[0] probabilities = model.predict_proba(embedding_2d)[0] confidence = max(probabilities) inference_time = time.time() - start_time results.append({ 'model': 'GTE Classifier', 'prediction': prediction, 'confidence': confidence, 'time': inference_time + embed_time }) current_progress = 100 yield format_progress(current_progress, "Completed!"), format_results(results) except Exception as e: yield "", f"
Error occurred: {str(e)}
" # Create Gradio interface with custom CSS css = """ .main { gap: 0 !important; } .contain { gap: 0 !important; } .feedback { margin-top: 0 !important; margin-bottom: 0 !important; } """ iface = gr.Interface( fn=predict_text_streaming, inputs=gr.Textbox(label="Enter text to classify", lines=3), outputs=[ gr.HTML(label="Progress"), gr.HTML(label="Model Predictions") ], title="Text Classification Model Comparison", description="Compare predictions from different text classification models (Results stream as they become available)", theme=gr.themes.Soft(), css=css ) if __name__ == "__main__": iface.launch(debug=True)