joshnguyen commited on
Commit
be83937
1 Parent(s): 8fb7a30

Update emb method

Browse files
Files changed (1) hide show
  1. README.md +3 -3
README.md CHANGED
@@ -71,7 +71,7 @@ texts = [
71
  "Abstract 2"
72
  ]
73
  inputs = tokenizer(
74
- text_batch,
75
  return_tensors="pt",
76
  return_token_type_ids=False,
77
  padding=True,
@@ -81,6 +81,6 @@ inputs = tokenizer(
81
  inputs.to(model.device)
82
  outputs = model(**inputs, output_hidden_states=True)
83
 
84
- # Last layer of the hidden states. Get the embedding of the first token in each sequence
85
- embeddings = outputs["hidden_states"][-1][:, 0, ...].detach().cpu().numpy()
86
  ```
 
71
  "Abstract 2"
72
  ]
73
  inputs = tokenizer(
74
+ texts,
75
  return_tensors="pt",
76
  return_token_type_ids=False,
77
  padding=True,
 
81
  inputs.to(model.device)
82
  outputs = model(**inputs, output_hidden_states=True)
83
 
84
+ # Last layer of the hidden states. Get average embedding of all tokens
85
+ embeddings = outputs["hidden_states"][-1][:, :, ...].mean().detach().cpu().numpy()
86
  ```