Update README.md
Browse files
README.md
CHANGED
@@ -1,41 +1,22 @@
|
|
1 |
-
Compiled:
|
2 |
-
```
|
3 |
-
sequence_length=32
|
4 |
-
auto_cast_type="bf16"
|
5 |
-
batch_size=8
|
6 |
-
model_dir="/deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
|
7 |
-
model_id="deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
|
8 |
-
|
9 |
-
from huggingface_hub.hf_api import HfFolder
|
10 |
-
from huggingface_hub import login
|
11 |
-
from optimum.neuron import NeuronModelForCausalLM
|
12 |
-
|
13 |
-
login(hf_token,add_to_git_credential=True)
|
14 |
-
|
15 |
compiler_args = {"num_cores": 16, "auto_cast_type": auto_cast_type}
|
16 |
-
input_shapes = {"batch_size":
|
17 |
-
model = NeuronModelForCausalLM.from_pretrained(
|
18 |
-
model_id,
|
19 |
-
export=True,
|
20 |
-
**compiler_args,
|
21 |
-
**input_shapes)
|
22 |
-
model.save_pretrained(model_dir)
|
23 |
-
model.push_to_hub(model_dir,repository_id=hf_repo)
|
24 |
-
```
|
25 |
-
Serve:
|
26 |
```
|
|
|
|
|
|
|
27 |
import torch
|
28 |
from optimum.neuron import NeuronModelForCausalLM
|
29 |
from transformers import AutoTokenizer
|
30 |
model_id="deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
|
31 |
-
prompt="
|
32 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
33 |
tokenizer.pad_token_id = tokenizer.eos_token_id if tokenizer.pad_token_id is None else tokenizer.pad_token_id
|
34 |
inputs = tokenizer(prompt, return_tensors="pt")
|
35 |
model = NeuronModelForCausalLM.from_pretrained("yahavb/DeepSeek-R1-Distill-Llama-70B-Neuron")
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
```
|
|
|
1 |
+
Compiled with:
|
2 |
+
```json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
compiler_args = {"num_cores": 16, "auto_cast_type": auto_cast_type}
|
4 |
+
input_shapes = {"batch_size": 8, "sequence_length": 32}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
```
|
6 |
+
|
7 |
+
Usage:
|
8 |
+
```python
|
9 |
import torch
|
10 |
from optimum.neuron import NeuronModelForCausalLM
|
11 |
from transformers import AutoTokenizer
|
12 |
model_id="deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
|
13 |
+
prompt="What is is the capital of France?"
|
14 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
15 |
tokenizer.pad_token_id = tokenizer.eos_token_id if tokenizer.pad_token_id is None else tokenizer.pad_token_id
|
16 |
inputs = tokenizer(prompt, return_tensors="pt")
|
17 |
model = NeuronModelForCausalLM.from_pretrained("yahavb/DeepSeek-R1-Distill-Llama-70B-Neuron")
|
18 |
+
outputs = model.generate(**inputs,max_new_tokens=512,do_sample=True,use_cache=True,temperature=0.7,top_k=50,top_p=0.9)
|
19 |
+
outputs=outputs[0, inputs.input_ids.size(-1):]
|
20 |
+
response=tokenizer.decode(outputs, skip_special_tokens=True)
|
21 |
+
print(response)
|
22 |
+
|
|