milyiyo commited on
Commit
5f0ab2a
·
1 Parent(s): 32b66e2

Substitute the model and tokenizer used for embeddings

Browse files
Files changed (1) hide show
  1. functions.py +8 -3
functions.py CHANGED
@@ -21,6 +21,7 @@ shared = {
21
  'full_text': None,
22
  }
23
 
 
24
  def get_nearest_examples(question: str, k: int):
25
  print(['get_nearest_examples', 'start'])
26
  question_embedding = get_embeddings([question]).cpu().detach().numpy()
@@ -36,12 +37,16 @@ def get_nearest_examples(question: str, k: int):
36
 
37
  def get_embeddings(text):
38
  print(['get_embeddings', 'start'])
39
- encoded_input = tokenizer(
40
- text, padding=True, truncation=True, return_tensors="pt")
 
 
41
  encoded_input = {k: v.to('cuda') for k, v in encoded_input.items()}
42
- model_output = model(**encoded_input)
43
  model_output = model_output.last_hidden_state[:, 0]
 
44
  emb_item = model_output.detach().cpu().numpy()[0]
 
45
  print(['get_embeddings', 'end'])
46
  return emb_item
47
 
 
21
  'full_text': None,
22
  }
23
 
24
+
25
  def get_nearest_examples(question: str, k: int):
26
  print(['get_nearest_examples', 'start'])
27
  question_embedding = get_embeddings([question]).cpu().detach().numpy()
 
37
 
38
  def get_embeddings(text):
39
  print(['get_embeddings', 'start'])
40
+ encoded_input = emb_tokenizer(text,
41
+ padding=True,
42
+ truncation=True,
43
+ return_tensors="pt")
44
  encoded_input = {k: v.to('cuda') for k, v in encoded_input.items()}
45
+ model_output = emb_model(**encoded_input)
46
  model_output = model_output.last_hidden_state[:, 0]
47
+ print(model_output)
48
  emb_item = model_output.detach().cpu().numpy()[0]
49
+ print(emb_item)
50
  print(['get_embeddings', 'end'])
51
  return emb_item
52