jgrosjean commited on
Commit
4b9a203
·
verified ·
1 Parent(s): 66b1cb4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -22
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import gradio as gr
2
  import torch
3
-
4
  from transformers import AutoModel, AutoTokenizer
5
  from sklearn.metrics.pairwise import cosine_similarity
6
 
@@ -10,8 +9,6 @@ model = AutoModel.from_pretrained(model_name)
10
  tokenizer = AutoTokenizer.from_pretrained(model_name)
11
 
12
  def generate_sentence_embedding(sentence, language):
13
-
14
- # Set adapter to specified language
15
  if "de" in language:
16
  model.set_default_language("de_CH")
17
  if "fr" in language:
@@ -20,15 +17,9 @@ def generate_sentence_embedding(sentence, language):
20
  model.set_default_language("it_CH")
21
  if "rm" in language:
22
  model.set_default_language("rm_CH")
23
-
24
- # Tokenize input sentence
25
  inputs = tokenizer(sentence, padding=True, truncation=True, return_tensors="pt", max_length=512)
26
-
27
- # Take tokenized input and pass it through the model
28
  with torch.no_grad():
29
  outputs = model(**inputs)
30
-
31
- # Extract sentence embeddings via mean pooling
32
  token_embeddings = outputs.last_hidden_state
33
  attention_mask = inputs['attention_mask'].unsqueeze(-1).expand(token_embeddings.size()).float()
34
  sum_embeddings = torch.sum(token_embeddings * attention_mask, 1)
@@ -37,16 +28,13 @@ def generate_sentence_embedding(sentence, language):
37
  return embedding
38
 
39
  def calculate_cosine_similarities(source_sentence, source_language, target_sentence_1, target_language_1, target_sentence_2, target_language_2, target_sentence_3, target_language_3):
40
-
41
  source_embedding = generate_sentence_embedding(source_sentence, source_language)
42
  target_embedding_1 = generate_sentence_embedding(target_sentence_1, target_language_1)
43
  target_embedding_2 = generate_sentence_embedding(target_sentence_2, target_language_2)
44
  target_embedding_3 = generate_sentence_embedding(target_sentence_3, target_language_3)
45
-
46
  cosine_score_1 = cosine_similarity(source_embedding, target_embedding_1)
47
  cosine_score_2 = cosine_similarity(source_embedding, target_embedding_2)
48
  cosine_score_3 = cosine_similarity(source_embedding, target_embedding_3)
49
-
50
  cosine_scores = {
51
  target_sentence_1: cosine_score_1[0][0],
52
  target_sentence_2: cosine_score_2[0][0],
@@ -63,18 +51,23 @@ def main():
63
  demo = gr.Interface(
64
  fn=calculate_cosine_similarities,
65
  inputs=[
66
- gr.Textbox(lines=1, placeholder="Der Zug fährt um 9 Uhr in Zürich ab.", label="source sentence"),
67
- gr.Dropdown(["de", "fr", "it", "rm"], value="de", label="language"),
68
- gr.Textbox(lines=1, placeholder="Le train arrive à Lausanne à 11 heures.", label="target sentence 1"),
69
- gr.Dropdown(["de", "fr", "it", "rm"], value="fr", label="language"),
70
- gr.Textbox(lines=1, placeholder="Alla stazione di Lugano ci sono diversi binari.", label="target sentence 2"),
71
- gr.Dropdown(["de", "fr", "it", "rm"], value="it", label="language"),
72
- gr.Textbox(lines=1, placeholder="A Cuera van biars trens ellas muntognas.", label="target sentence 3"),
73
- gr.Dropdown(["de", "fr", "it", "rm"], value="rm", label="language")
74
  ],
75
- outputs= gr.Textbox(label="Cosine similarity scores", type="text", lines=3)
 
 
 
 
 
76
  )
77
  demo.launch(share=True)
78
 
79
  if __name__ == "__main__":
80
- main()
 
1
  import gradio as gr
2
  import torch
 
3
  from transformers import AutoModel, AutoTokenizer
4
  from sklearn.metrics.pairwise import cosine_similarity
5
 
 
9
  tokenizer = AutoTokenizer.from_pretrained(model_name)
10
 
11
  def generate_sentence_embedding(sentence, language):
 
 
12
  if "de" in language:
13
  model.set_default_language("de_CH")
14
  if "fr" in language:
 
17
  model.set_default_language("it_CH")
18
  if "rm" in language:
19
  model.set_default_language("rm_CH")
 
 
20
  inputs = tokenizer(sentence, padding=True, truncation=True, return_tensors="pt", max_length=512)
 
 
21
  with torch.no_grad():
22
  outputs = model(**inputs)
 
 
23
  token_embeddings = outputs.last_hidden_state
24
  attention_mask = inputs['attention_mask'].unsqueeze(-1).expand(token_embeddings.size()).float()
25
  sum_embeddings = torch.sum(token_embeddings * attention_mask, 1)
 
28
  return embedding
29
 
30
  def calculate_cosine_similarities(source_sentence, source_language, target_sentence_1, target_language_1, target_sentence_2, target_language_2, target_sentence_3, target_language_3):
 
31
  source_embedding = generate_sentence_embedding(source_sentence, source_language)
32
  target_embedding_1 = generate_sentence_embedding(target_sentence_1, target_language_1)
33
  target_embedding_2 = generate_sentence_embedding(target_sentence_2, target_language_2)
34
  target_embedding_3 = generate_sentence_embedding(target_sentence_3, target_language_3)
 
35
  cosine_score_1 = cosine_similarity(source_embedding, target_embedding_1)
36
  cosine_score_2 = cosine_similarity(source_embedding, target_embedding_2)
37
  cosine_score_3 = cosine_similarity(source_embedding, target_embedding_3)
 
38
  cosine_scores = {
39
  target_sentence_1: cosine_score_1[0][0],
40
  target_sentence_2: cosine_score_2[0][0],
 
51
  demo = gr.Interface(
52
  fn=calculate_cosine_similarities,
53
  inputs=[
54
+ gr.Textbox(lines=1, placeholder="Enter source sentence", label="Source Sentence"),
55
+ gr.Dropdown(["de", "fr", "it", "rm"], label="Source Language"),
56
+ gr.Textbox(lines=1, placeholder="Enter target sentence 1", label="Target Sentence 1"),
57
+ gr.Dropdown(["de", "fr", "it", "rm"], label="Target Language 1"),
58
+ gr.Textbox(lines=1, placeholder="Enter target sentence 2", label="Target Sentence 2"),
59
+ gr.Dropdown(["de", "fr", "it", "rm"], label="Target Language 2"),
60
+ gr.Textbox(lines=1, placeholder="Enter target sentence 3", label="Target Sentence 3"),
61
+ gr.Dropdown(["de", "fr", "it", "rm"], label="Target Language 3")
62
  ],
63
+ outputs= gr.Textbox(label="Cosine Similarity Scores", type="text", lines=3),
64
+ title="Sentence Similarity Calculator",
65
+ description="Enter a source sentence and up to three target sentences to calculate their cosine similarity.",
66
+ examples=[
67
+ ["Der Zug fährt um 9 Uhr in Zürich ab.", "de", "Le train arrive à Lausanne à 11 heures.", "fr", "Alla stazione di Lugano ci sono diversi binari.", "it", "A Cuera van biars trens ellas muntognas.", "rm"]
68
+ ]
69
  )
70
  demo.launch(share=True)
71
 
72
  if __name__ == "__main__":
73
+ main()