joshnguyen commited on
Commit
a6f4193
·
1 Parent(s): 3ee1192

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +23 -0
README.md CHANGED
@@ -12,6 +12,7 @@ tags:
12
  - llama-2
13
  - astronomy
14
  - astrophysics
 
15
  ---
16
 
17
  <p><h1>AstroLLaMA</h1></p>
@@ -62,3 +63,25 @@ generated_text = generator(
62
  max_length=512
63
  )
64
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  - llama-2
13
  - astronomy
14
  - astrophysics
15
+ - arxiv
16
  ---
17
 
18
  <p><h1>AstroLLaMA</h1></p>
 
63
  max_length=512
64
  )
65
  ```
66
+
67
+ ## Embedding text with AstroLLaMA
68
+
69
+ ```
70
+ texts = [
71
+ "Abstract 1",
72
+ "Abstract 2"
73
+ ]
74
+ inputs = tokenizer(
75
+ text_batch,
76
+ return_tensors="pt",
77
+ return_token_type_ids=False,
78
+ padding=True,
79
+ truncation=True,
80
+ max_length=4096
81
+ )
82
+ inputs.to(model.device)
83
+ outputs = model(**inputs, output_hidden_states=True)
84
+
85
+ # Last layer of the hidden states. Get the embedding of the first token in each sequence
86
+ embeddings = outputs["hidden_states"][-1][:, 0, ...].detach().cpu().numpy()
87
+ ```