|
import os |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
import torch |
|
|
|
|
|
hf_token = os.getenv("HF_API_TOKEN") |
|
|
|
|
|
model_id = "meta-llama/Llama-2-7b-hf" |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=hf_token) |
|
tokenizer.pad_token = tokenizer.eos_token |
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_id, |
|
device_map="cpu", |
|
use_auth_token=hf_token |
|
) |
|
|
|
|
|
input_text = "Hello, how are you?" |
|
inputs = tokenizer(input_text, return_tensors="pt") |
|
outputs = model.generate(inputs["input_ids"]) |
|
|
|
|
|
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
print(generated_text) |
|
|