|
import os |
|
from transformers import LlamaTokenizer, LlamaForCausalLM |
|
import torch |
|
|
|
|
|
repo_path = "meta-llama/Llama-2-7b-chat" |
|
config_path = os.path.join(repo_path, "config.json") |
|
tokenizer_path = os.path.join(repo_path, "tokenizer.model") |
|
|
|
|
|
if not os.path.exists(repo_path): |
|
raise FileNotFoundError(f"The specified repository path does not exist: {repo_path}") |
|
|
|
required_files = ["config.json", "tokenizer.model", "consolidated.00.pth", "params.json"] |
|
for file in required_files: |
|
if not os.path.exists(os.path.join(repo_path, file)): |
|
raise FileNotFoundError(f"Missing required file in {repo_path}: {file}") |
|
|
|
|
|
print("Loading tokenizer...") |
|
tokenizer = LlamaTokenizer(vocab_file=tokenizer_path) |
|
print("Tokenizer loaded successfully!") |
|
|
|
|
|
print("Loading model...") |
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
model = LlamaForCausalLM.from_pretrained( |
|
repo_path, |
|
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, |
|
low_cpu_mem_usage=True, |
|
) |
|
model = model.to(device) |
|
print("Model loaded successfully!") |
|
|
|
|
|
input_text = "Привіт! Як ти себе почуваєш сьогодні?" |
|
inputs = tokenizer(input_text, return_tensors="pt").to(device) |
|
|
|
print("Generating response...") |
|
with torch.no_grad(): |
|
outputs = model.generate( |
|
inputs["input_ids"], |
|
max_length=100, |
|
temperature=0.7, |
|
top_k=50, |
|
top_p=0.9, |
|
do_sample=True, |
|
) |
|
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
print("Response:") |
|
print(response) |
|
|