Tonic commited on
Commit
cf71836
·
verified ·
1 Parent(s): 78a49c2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -11
app.py CHANGED
@@ -51,29 +51,29 @@ class EmbeddingModel:
51
  self.model = AutoModel.from_pretrained('intfloat/e5-mistral-7b-instruct', torch_dtype=torch.float16, device_map=device)
52
 
53
  @spaces.GPU
54
- def compute_embeddings(selected_task, input_text):
55
  max_length = 2042
56
  task_description = tasks[selected_task]
57
  processed_texts = [f'Instruct: {task_description}\nQuery: {input_text}']
58
 
59
- batch_dict = tokenizer(processed_texts, max_length=max_length - 1, return_attention_mask=False, padding=False, truncation=True)
60
- batch_dict['input_ids'] = [input_ids + [tokenizer.eos_token_id] for input_ids in batch_dict['input_ids']]
61
- batch_dict = tokenizer.pad(batch_dict, padding=True, return_attention_mask=True, return_tensors='pt')
62
  batch_dict = {k: v.to(device) for k, v in batch_dict.items()}
63
- outputs = model(**batch_dict)
64
  embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
65
  embeddings = F.normalize(embeddings, p=2, dim=1)
66
  embeddings_list = embeddings.detach().cpu().numpy().tolist()
67
  return embeddings_list
68
 
69
  @spaces.GPU
70
- def compute_similarity(self, sentence1, sentence2, extra_sentence1, extra_sentence2):
71
 
72
  # Compute embeddings for each sentence
73
- embeddings1 = compute_embeddings(self.selected_task, sentence1)
74
- embeddings2 = compute_embeddings(self.selected_task, sentence2)
75
- embeddings3 = compute_embeddings(self.selected_task, extra_sentence1)
76
- embeddings4 = compute_embeddings(self.selected_task, extra_sentence2)
77
 
78
  # Convert embeddings to tensors
79
  embeddings_tensor1 = torch.tensor(embeddings1).to(device)
@@ -89,6 +89,7 @@ class EmbeddingModel:
89
 
90
 
91
  def app_interface():
 
92
  with gr.Blocks() as demo:
93
  gr.Markdown(title)
94
  gr.Markdown(description)
@@ -114,7 +115,7 @@ def app_interface():
114
  similarity_output = gr.Label(label="🐣e5-mistral🛌🏻 Similarity Scores")
115
  similarity_button.click(
116
  fn=EmbeddingModel.compute_similarity,
117
- inputs=[sentence1_box, sentence2_box, extra_sentence1_box, extra_sentence2_box],
118
  outputs=similarity_output
119
  )
120
 
 
51
  self.model = AutoModel.from_pretrained('intfloat/e5-mistral-7b-instruct', torch_dtype=torch.float16, device_map=device)
52
 
53
  @spaces.GPU
54
+ def compute_embeddings(self, selected_task, input_text):
55
  max_length = 2042
56
  task_description = tasks[selected_task]
57
  processed_texts = [f'Instruct: {task_description}\nQuery: {input_text}']
58
 
59
+ batch_dict = self.tokenizer(processed_texts, max_length=max_length - 1, return_attention_mask=False, padding=False, truncation=True)
60
+ batch_dict['input_ids'] = [input_ids + [self.tokenizer.eos_token_id] for input_ids in batch_dict['input_ids']]
61
+ batch_dict = self.tokenizer.pad(batch_dict, padding=True, return_attention_mask=True, return_tensors='pt')
62
  batch_dict = {k: v.to(device) for k, v in batch_dict.items()}
63
+ outputs = self.model(**batch_dict)
64
  embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
65
  embeddings = F.normalize(embeddings, p=2, dim=1)
66
  embeddings_list = embeddings.detach().cpu().numpy().tolist()
67
  return embeddings_list
68
 
69
  @spaces.GPU
70
+ def compute_similarity(self, selected_task, sentence1, sentence2, extra_sentence1, extra_sentence2):
71
 
72
  # Compute embeddings for each sentence
73
+ embeddings1 = self.compute_embeddings(self.selected_task, sentence1)
74
+ embeddings2 = self.compute_embeddings(self.selected_task, sentence2)
75
+ embeddings3 = self.compute_embeddings(self.selected_task, extra_sentence1)
76
+ embeddings4 = self.compute_embeddings(self.selected_task, extra_sentence2)
77
 
78
  # Convert embeddings to tensors
79
  embeddings_tensor1 = torch.tensor(embeddings1).to(device)
 
89
 
90
 
91
  def app_interface():
92
+ # embedding_model = EmbeddingModel()
93
  with gr.Blocks() as demo:
94
  gr.Markdown(title)
95
  gr.Markdown(description)
 
115
  similarity_output = gr.Label(label="🐣e5-mistral🛌🏻 Similarity Scores")
116
  similarity_button.click(
117
  fn=EmbeddingModel.compute_similarity,
118
+ inputs=[task_dropdown, sentence1_box, sentence2_box, extra_sentence1_box, extra_sentence2_box],
119
  outputs=similarity_output
120
  )
121