yahavb commited on
Commit
ffc1431
·
verified ·
1 Parent(s): d099e08

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +12 -31
README.md CHANGED
@@ -1,41 +1,22 @@
1
- Compiled:
2
- ```paython
3
- sequence_length=32
4
- auto_cast_type="bf16"
5
- batch_size=8
6
- model_dir="/deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
7
- model_id="deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
8
-
9
- from huggingface_hub.hf_api import HfFolder
10
- from huggingface_hub import login
11
- from optimum.neuron import NeuronModelForCausalLM
12
-
13
- login(hf_token,add_to_git_credential=True)
14
-
15
  compiler_args = {"num_cores": 16, "auto_cast_type": auto_cast_type}
16
- input_shapes = {"batch_size": batch_size, "sequence_length": sequence_length}
17
- model = NeuronModelForCausalLM.from_pretrained(
18
- model_id,
19
- export=True,
20
- **compiler_args,
21
- **input_shapes)
22
- model.save_pretrained(model_dir)
23
- model.push_to_hub(model_dir,repository_id=hf_repo)
24
- ```
25
- Serve:
26
  ```
 
 
 
27
  import torch
28
  from optimum.neuron import NeuronModelForCausalLM
29
  from transformers import AutoTokenizer
30
  model_id="deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
31
- prompt="Who are you? what is the model that powers you?"
32
  tokenizer = AutoTokenizer.from_pretrained(model_id)
33
  tokenizer.pad_token_id = tokenizer.eos_token_id if tokenizer.pad_token_id is None else tokenizer.pad_token_id
34
  inputs = tokenizer(prompt, return_tensors="pt")
35
  model = NeuronModelForCausalLM.from_pretrained("yahavb/DeepSeek-R1-Distill-Llama-70B-Neuron")
36
- for i in range(10):
37
- outputs = model.generate(**inputs,max_new_tokens=512,do_sample=True,use_cache=True,temperature=0.7,top_k=50,top_p=0.9)
38
- outputs=outputs[0, inputs.input_ids.size(-1):]
39
- response=tokenizer.decode(outputs, skip_special_tokens=True)
40
- print(response)
41
- ```
 
1
+ Compiled with:
2
+ ```json
 
 
 
 
 
 
 
 
 
 
 
 
3
  compiler_args = {"num_cores": 16, "auto_cast_type": auto_cast_type}
4
+ input_shapes = {"batch_size": 8, "sequence_length": 32}
 
 
 
 
 
 
 
 
 
5
  ```
6
+
7
+ Usage:
8
+ ```python
9
  import torch
10
  from optimum.neuron import NeuronModelForCausalLM
11
  from transformers import AutoTokenizer
12
  model_id="deepseek-ai/DeepSeek-R1-Distill-Llama-70B"
13
+ prompt="What is is the capital of France?"
14
  tokenizer = AutoTokenizer.from_pretrained(model_id)
15
  tokenizer.pad_token_id = tokenizer.eos_token_id if tokenizer.pad_token_id is None else tokenizer.pad_token_id
16
  inputs = tokenizer(prompt, return_tensors="pt")
17
  model = NeuronModelForCausalLM.from_pretrained("yahavb/DeepSeek-R1-Distill-Llama-70B-Neuron")
18
+ outputs = model.generate(**inputs,max_new_tokens=512,do_sample=True,use_cache=True,temperature=0.7,top_k=50,top_p=0.9)
19
+ outputs=outputs[0, inputs.input_ids.size(-1):]
20
+ response=tokenizer.decode(outputs, skip_special_tokens=True)
21
+ print(response)
22
+