import gradio as gr from sentence_transformers import SentenceTransformer import pandas as pd from datasets import load_dataset from annoy import AnnoyIndex import numpy as np # Load the dataset dataset = load_dataset("nickprock/AIRC_FAQ") df = pd.DataFrame(dataset["train"]) # Extract questions and answers questions = df["question"].tolist() answers = df["answer"].tolist() # Sentence-transformers models to test model_names = [ "nickprock/multi-sentence-BERTino", "nickprock/sentence-bert-base-italian-uncased", "nickprock/sentence-bert-base-italian-xxl-uncased", "nickprock/mmarco-bert-base-italian-uncased", ] models = {name: SentenceTransformer(name) for name in model_names} annoy_indexes = {} # Store Annoy indexes for each model def build_annoy_index(model_name): """Builds an Annoy index for a given model.""" model = models[model_name] embeddings = model.encode(answers) embedding_dim = embeddings.shape[1] annoy_index = AnnoyIndex(embedding_dim, "angular") # Use angular distance for cosine similarity for i, embedding in enumerate(embeddings): annoy_index.add_item(i, embedding) annoy_index.build(10) # Build with 10 trees return annoy_index # Build Annoy indexes for each model for model_name in model_names: annoy_indexes[model_name] = build_annoy_index(model_name) def find_similar_answer_annoy(question, model_name): """Finds the most similar answer using Annoy.""" model = models[model_name] annoy_index = annoy_indexes[model_name] question_embedding = model.encode(question) nearest_neighbors = annoy_index.get_nns_by_vector(question_embedding, 1) # Get the nearest neighbor best_answer_index = nearest_neighbors[0] return answers[best_answer_index] def compare_models_annoy(question, model1_name, model2_name, model3_name, model4_name): """Compares the results of different models using Annoy.""" answer1 = find_similar_answer_annoy(question, model1_name) answer2 = find_similar_answer_annoy(question, model2_name) answer3 = find_similar_answer_annoy(question, model3_name) answer4 = find_similar_answer_annoy(question, model4_name) return answer1, answer2, answer3, answer4 iface = gr.Interface( fn=compare_models_annoy, inputs=[ gr.Textbox(lines=2, placeholder="Enter your question here..."), gr.Dropdown(model_names, value=model_names[0], label="Model 1"), gr.Dropdown(model_names, value=model_names[1], label="Model 2"), gr.Dropdown(model_names, value=model_names[2], label="Model 3"), gr.Dropdown(model_names, value=model_names[3], label="Model 4"), ], outputs=[ gr.Textbox(label=model_names[0]), gr.Textbox(label=model_names[1]), gr.Textbox(label=model_names[2]), gr.Textbox(label=model_names[3]), ], title="Sentence Transformer Model Comparison (Annoy)", description="Enter a question and compare the answers generated by different sentence-transformer models (using Annoy for faster search).", ) iface.launch()