UniverseTBD
/

astrollama

Text Generation

text-generation-inference

Model card Files Files and versions Community

joshnguyen commited on Sep 4, 2023

Commit

be83937

·

1 Parent(s): 8fb7a30

Update emb method

Files changed (1) hide show

README.md +3 -3

README.md CHANGED Viewed

@@ -71,7 +71,7 @@ texts = [
     "Abstract 2"
 ]
 inputs = tokenizer(
-    text_batch,
     return_tensors="pt",
     return_token_type_ids=False,
     padding=True,
@@ -81,6 +81,6 @@ inputs = tokenizer(
 inputs.to(model.device)
 outputs = model(**inputs, output_hidden_states=True)
-# Last layer of the hidden states. Get the embedding of the first token in each sequence
-embeddings = outputs["hidden_states"][-1][:, 0, ...].detach().cpu().numpy()
 ```

     "Abstract 2"
 ]
 inputs = tokenizer(
+    texts,
     return_tensors="pt",
     return_token_type_ids=False,
     padding=True,
 inputs.to(model.device)
 outputs = model(**inputs, output_hidden_states=True)
+# Last layer of the hidden states. Get average embedding of all tokens
+embeddings = outputs["hidden_states"][-1][:, :, ...].mean().detach().cpu().numpy()
 ```