joshnguyen
commited on
Commit
•
be83937
1
Parent(s):
8fb7a30
Update emb method
Browse files
README.md
CHANGED
@@ -71,7 +71,7 @@ texts = [
|
|
71 |
"Abstract 2"
|
72 |
]
|
73 |
inputs = tokenizer(
|
74 |
-
|
75 |
return_tensors="pt",
|
76 |
return_token_type_ids=False,
|
77 |
padding=True,
|
@@ -81,6 +81,6 @@ inputs = tokenizer(
|
|
81 |
inputs.to(model.device)
|
82 |
outputs = model(**inputs, output_hidden_states=True)
|
83 |
|
84 |
-
# Last layer of the hidden states. Get
|
85 |
-
embeddings = outputs["hidden_states"][-1][:,
|
86 |
```
|
|
|
71 |
"Abstract 2"
|
72 |
]
|
73 |
inputs = tokenizer(
|
74 |
+
texts,
|
75 |
return_tensors="pt",
|
76 |
return_token_type_ids=False,
|
77 |
padding=True,
|
|
|
81 |
inputs.to(model.device)
|
82 |
outputs = model(**inputs, output_hidden_states=True)
|
83 |
|
84 |
+
# Last layer of the hidden states. Get average embedding of all tokens
|
85 |
+
embeddings = outputs["hidden_states"][-1][:, :, ...].mean().detach().cpu().numpy()
|
86 |
```
|