joshnguyen
commited on
Commit
·
a6f4193
1
Parent(s):
3ee1192
Update README.md
Browse files
README.md
CHANGED
@@ -12,6 +12,7 @@ tags:
|
|
12 |
- llama-2
|
13 |
- astronomy
|
14 |
- astrophysics
|
|
|
15 |
---
|
16 |
|
17 |
<p><h1>AstroLLaMA</h1></p>
|
@@ -62,3 +63,25 @@ generated_text = generator(
|
|
62 |
max_length=512
|
63 |
)
|
64 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
- llama-2
|
13 |
- astronomy
|
14 |
- astrophysics
|
15 |
+
- arxiv
|
16 |
---
|
17 |
|
18 |
<p><h1>AstroLLaMA</h1></p>
|
|
|
63 |
max_length=512
|
64 |
)
|
65 |
```
|
66 |
+
|
67 |
+
## Embedding text with AstroLLaMA
|
68 |
+
|
69 |
+
```
|
70 |
+
texts = [
|
71 |
+
"Abstract 1",
|
72 |
+
"Abstract 2"
|
73 |
+
]
|
74 |
+
inputs = tokenizer(
|
75 |
+
text_batch,
|
76 |
+
return_tensors="pt",
|
77 |
+
return_token_type_ids=False,
|
78 |
+
padding=True,
|
79 |
+
truncation=True,
|
80 |
+
max_length=4096
|
81 |
+
)
|
82 |
+
inputs.to(model.device)
|
83 |
+
outputs = model(**inputs, output_hidden_states=True)
|
84 |
+
|
85 |
+
# Last layer of the hidden states. Get the embedding of the first token in each sequence
|
86 |
+
embeddings = outputs["hidden_states"][-1][:, 0, ...].detach().cpu().numpy()
|
87 |
+
```
|