|
import os |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
|
|
|
hf_token = os.getenv("HF_API_TOKEN") |
|
|
|
if not hf_token: |
|
raise ValueError("HF_API_TOKEN environment variable is not set") |
|
|
|
model_name = "meta-llama/Llama-2-7b-chat" |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token) |
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_name, |
|
use_auth_token=hf_token, |
|
torch_dtype=torch.float32, |
|
device_map=None |
|
) |
|
|
|
|
|
input_text = "Hello, how can I assist you today?" |
|
inputs = tokenizer(input_text, return_tensors="pt") |
|
|
|
|
|
output = model.generate( |
|
inputs["input_ids"], |
|
max_length=50, |
|
num_return_sequences=1, |
|
do_sample=True, |
|
temperature=0.7 |
|
) |
|
|
|
|
|
decoded_output = tokenizer.decode(output[0], skip_special_tokens=True) |
|
print(f"\nВідповідь моделі: {decoded_output}") |
|
|