|
from transformers import LlamaForCausalLM, LlamaTokenizer |
|
import torch |
|
import os |
|
|
|
|
|
hf_token = os.getenv("HF_API_TOKEN") |
|
|
|
if not hf_token: |
|
raise ValueError("HF_API_TOKEN environment variable is not set") |
|
|
|
|
|
model_path = "path_to_downloaded_model/consolidated.00.pth" |
|
|
|
|
|
tokenizer = LlamaTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat", token=hf_token) |
|
|
|
|
|
model = LlamaForCausalLM.from_pretrained( |
|
model_path, |
|
config="path_to_model/config.json", |
|
torch_dtype=torch.float32, |
|
device_map="auto" |
|
) |
|
|
|
|
|
input_text = "Hello, how can I assist you today?" |
|
inputs = tokenizer(input_text, return_tensors="pt") |
|
|
|
|
|
output = model.generate( |
|
inputs["input_ids"], |
|
max_length=50, |
|
num_return_sequences=1, |
|
do_sample=True, |
|
temperature=0.7 |
|
) |
|
|
|
|
|
decoded_output = tokenizer.decode(output[0], skip_special_tokens=True) |
|
print(f"\nВідповідь моделі: {decoded_output}") |
|
|