nickprock commited on
Commit
23ef606
·
verified ·
1 Parent(s): 950925d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -22
app.py CHANGED
@@ -19,29 +19,13 @@ try:
19
  "nickprock/multi-sentence-BERTino",
20
  "nickprock/sentence-bert-base-italian-uncased",
21
  "nickprock/sentence-bert-base-italian-xxl-uncased",
22
- "nickprock/mmarco-bert-base-italian-uncased",
23
  ]
24
 
25
  models = {name: SentenceTransformer(name) for name in model_names}
26
  annoy_indexes1 = {} # Store Annoy indexes for sentence1
27
  annoy_indexes2 = {} # Store Annoy indexes for sentence2
28
 
29
- def build_annoy_index(model_name, sentences):
30
- """Builds an Annoy index for a given model and sentences."""
31
- model = models[model_name]
32
- embeddings = model.encode(sentences)
33
- embedding_dim = embeddings.shape[1]
34
- annoy_index = AnnoyIndex(embedding_dim, "angular") # Use angular distance for cosine similarity
35
- for i, embedding in enumerate(embeddings):
36
- annoy_index.add_item(i, embedding)
37
- annoy_index.build(10) # Build with 10 trees
38
- return annoy_index
39
-
40
- # Build Annoy indexes for each model
41
- for model_name in model_names:
42
- annoy_indexes1[model_name] = build_annoy_index(model_name, sentences1)
43
- annoy_indexes2[model_name] = build_annoy_index(model_name, sentences2)
44
-
45
  def find_similar_sentence_annoy(sentence, model_name, sentence_list, annoy_index):
46
  """Finds the most similar sentence using Annoy."""
47
  model = models[model_name]
@@ -50,10 +34,18 @@ try:
50
  best_sentence_index = nearest_neighbors[0]
51
  return sentence_list[best_sentence_index]
52
 
 
 
 
 
 
 
 
53
  def compare_models_annoy(sentence, model1_name, model2_name, model3_name, model4_name):
54
  """Compares the results of different models using Annoy."""
55
  sentence1_results = {}
56
  sentence2_results = {}
 
57
 
58
  sentence1_results[model1_name] = find_similar_sentence_annoy(sentence, model1_name, sentences1, annoy_indexes1)
59
  sentence1_results[model2_name] = find_similar_sentence_annoy(sentence, model2_name, sentences1, annoy_indexes1)
@@ -65,21 +57,30 @@ try:
65
  sentence2_results[model3_name] = find_similar_sentence_annoy(sentence, model3_name, sentences2, annoy_indexes2)
66
  sentence2_results[model4_name] = find_similar_sentence_annoy(sentence, model4_name, sentences2, annoy_indexes2)
67
 
68
- return sentence1_results, sentence2_results
 
 
 
 
 
 
69
 
70
- def format_results(sentence1_results, sentence2_results):
71
  """Formats the results for display in Gradio."""
72
  output_text = ""
73
  for model_name in model_names:
74
  output_text += f"**{model_name}**\n"
75
  output_text += f"Most Similar Sentence from sentence1: {sentence1_results[model_name]}\n"
76
- output_text += f"Most Similar Sentence from sentence2: {sentence2_results[model_name]}\n\n"
 
77
  return output_text
78
 
79
  def gradio_interface(sentence, model1_name, model2_name, model3_name, model4_name):
80
  """Gradio interface function."""
81
- sentence1_results, sentence2_results = compare_models_annoy(sentence, model1_name, model2_name, model3_name, model4_name)
82
- return format_results(sentence1_results, sentence2_results)
 
 
83
 
84
  iface = gr.Interface(
85
  fn=gradio_interface,
 
19
  "nickprock/multi-sentence-BERTino",
20
  "nickprock/sentence-bert-base-italian-uncased",
21
  "nickprock/sentence-bert-base-italian-xxl-uncased",
22
+ "nickprock/Italian-ModernBERT-base-embed-mmarco-mnrl",
23
  ]
24
 
25
  models = {name: SentenceTransformer(name) for name in model_names}
26
  annoy_indexes1 = {} # Store Annoy indexes for sentence1
27
  annoy_indexes2 = {} # Store Annoy indexes for sentence2
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def find_similar_sentence_annoy(sentence, model_name, sentence_list, annoy_index):
30
  """Finds the most similar sentence using Annoy."""
31
  model = models[model_name]
 
34
  best_sentence_index = nearest_neighbors[0]
35
  return sentence_list[best_sentence_index]
36
 
37
+ def calculate_similarity(sentence1, sentence2, model):
38
+ """Calculates the cosine similarity between two sentences using a given model."""
39
+ embedding1 = model.encode(sentence1, convert_to_tensor=True)
40
+ embedding2 = model.encode(sentence2, convert_to_tensor=True)
41
+ similarity = util.cos_sim(embedding1, embedding2).item()
42
+ return similarity
43
+
44
  def compare_models_annoy(sentence, model1_name, model2_name, model3_name, model4_name):
45
  """Compares the results of different models using Annoy."""
46
  sentence1_results = {}
47
  sentence2_results = {}
48
+ similarity_results = {}
49
 
50
  sentence1_results[model1_name] = find_similar_sentence_annoy(sentence, model1_name, sentences1, annoy_indexes1)
51
  sentence1_results[model2_name] = find_similar_sentence_annoy(sentence, model2_name, sentences1, annoy_indexes1)
 
57
  sentence2_results[model3_name] = find_similar_sentence_annoy(sentence, model3_name, sentences2, annoy_indexes2)
58
  sentence2_results[model4_name] = find_similar_sentence_annoy(sentence, model4_name, sentences2, annoy_indexes2)
59
 
60
+ # Calculate similarity between the retrieved sentences
61
+ for model_name in model_names:
62
+ similarity_results[model_name] = calculate_similarity(
63
+ sentence1_results[model_name], sentence2_results[model_name], models[model_name]
64
+ )
65
+
66
+ return sentence1_results, sentence2_results, similarity_results
67
 
68
+ def format_results(sentence1_results, sentence2_results, similarity_results):
69
  """Formats the results for display in Gradio."""
70
  output_text = ""
71
  for model_name in model_names:
72
  output_text += f"**{model_name}**\n"
73
  output_text += f"Most Similar Sentence from sentence1: {sentence1_results[model_name]}\n"
74
+ output_text += f"Most Similar Sentence from sentence2: {sentence2_results[model_name]}\n"
75
+ output_text += f"Similarity between retrieved sentences: {similarity_results[model_name]:.4f}\n\n"
76
  return output_text
77
 
78
  def gradio_interface(sentence, model1_name, model2_name, model3_name, model4_name):
79
  """Gradio interface function."""
80
+ sentence1_results, sentence2_results, similarity_results = compare_models_annoy(
81
+ sentence, model1_name, model2_name, model3_name, model4_name
82
+ )
83
+ return format_results(sentence1_results, sentence2_results, similarity_results)
84
 
85
  iface = gr.Interface(
86
  fn=gradio_interface,