|
import torch |
|
from optimum.neuron import NeuronModelForCausalLM |
|
from transformers import AutoTokenizer |
|
model_id="deepseek-ai/DeepSeek-R1-Distill-Llama-70B" |
|
prompt="Who are you? what is the model that powers you?" |
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
tokenizer.pad_token_id = tokenizer.eos_token_id if tokenizer.pad_token_id is None else tokenizer.pad_token_id |
|
inputs = tokenizer(prompt, return_tensors="pt") |
|
model = NeuronModelForCausalLM.from_pretrained("yahavb/DeepSeek-R1-Distill-Llama-70B-Neuron") |
|
for i in range(10): |
|
outputs = model.generate(**inputs,max_new_tokens=512,do_sample=True,use_cache=True,temperature=0.7,top_k=50,top_p=0.9) |
|
outputs=outputs[0, inputs.input_ids.size(-1):] |
|
response=tokenizer.decode(outputs, skip_special_tokens=True) |
|
print(response) |