nickprock commited on
Commit
cdf4f6a
·
verified ·
1 Parent(s): f678a51

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from sentence_transformers import SentenceTransformer
3
+ import pandas as pd
4
+ from datasets import load_dataset
5
+ from annoy import AnnoyIndex
6
+ import numpy as np
7
+
8
+ # Load the dataset
9
+ dataset = load_dataset("nickprock/AIRC_FAQ")
10
+ df = pd.DataFrame(dataset["train"])
11
+
12
+ # Extract questions and answers
13
+ questions = df["question"].tolist()
14
+ answers = df["answer"].tolist()
15
+
16
+ # Sentence-transformers models to test
17
+ model_names = [
18
+ "nickprock/multi-sentence-BERTino",
19
+ "nickprock/sentence-bert-base-italian-uncased",
20
+ "nickprock/sentence-bert-base-italian-xxl-uncased",
21
+ "nickprock/mmarco-bert-base-italian-uncased",
22
+ ]
23
+
24
+ models = {name: SentenceTransformer(name) for name in model_names}
25
+ annoy_indexes = {} # Store Annoy indexes for each model
26
+
27
+ def build_annoy_index(model_name):
28
+ """Builds an Annoy index for a given model."""
29
+ model = models[model_name]
30
+ embeddings = model.encode(answers)
31
+ embedding_dim = embeddings.shape[1]
32
+ annoy_index = AnnoyIndex(embedding_dim, "angular") # Use angular distance for cosine similarity
33
+ for i, embedding in enumerate(embeddings):
34
+ annoy_index.add_item(i, embedding)
35
+ annoy_index.build(10) # Build with 10 trees
36
+ return annoy_index
37
+
38
+ # Build Annoy indexes for each model
39
+ for model_name in model_names:
40
+ annoy_indexes[model_name] = build_annoy_index(model_name)
41
+
42
+ def find_similar_answer_annoy(question, model_name):
43
+ """Finds the most similar answer using Annoy."""
44
+ model = models[model_name]
45
+ annoy_index = annoy_indexes[model_name]
46
+ question_embedding = model.encode(question)
47
+ nearest_neighbors = annoy_index.get_nns_by_vector(question_embedding, 1) # Get the nearest neighbor
48
+ best_answer_index = nearest_neighbors[0]
49
+ return answers[best_answer_index]
50
+
51
+ def compare_models_annoy(question, model1_name, model2_name, model3_name, model4_name):
52
+ """Compares the results of different models using Annoy."""
53
+ answer1 = find_similar_answer_annoy(question, model1_name)
54
+ answer2 = find_similar_answer_annoy(question, model2_name)
55
+ answer3 = find_similar_answer_annoy(question, model3_name)
56
+ answer4 = find_similar_answer_annoy(question, model4_name)
57
+ return answer1, answer2, answer3, answer4
58
+
59
+ iface = gr.Interface(
60
+ fn=compare_models_annoy,
61
+ inputs=[
62
+ gr.Textbox(lines=2, placeholder="Enter your question here..."),
63
+ gr.Dropdown(model_names, value=model_names[0], label="Model 1"),
64
+ gr.Dropdown(model_names, value=model_names[1], label="Model 2"),
65
+ gr.Dropdown(model_names, value=model_names[2], label="Model 3"),
66
+ gr.Dropdown(model_names, value=model_names[3], label="Model 4"),
67
+ ],
68
+ outputs=[
69
+ gr.Textbox(label=model_names[0]),
70
+ gr.Textbox(label=model_names[1]),
71
+ gr.Textbox(label=model_names[2]),
72
+ gr.Textbox(label=model_names[3]),
73
+ ],
74
+ title="Sentence Transformer Model Comparison (Annoy)",
75
+ description="Enter a question and compare the answers generated by different sentence-transformer models (using Annoy for faster search).",
76
+ )
77
+
78
+ iface.launch()