Spaces:

mrmax14
/

Thera

Runtime error

App Files Files Community

Thera / app.py

mrmax14

new llama model added

7611348 5 months ago

raw

history blame

1.85 kB

	import os
	from transformers import LlamaTokenizer, LlamaForCausalLM
	import torch

	# Налаштування шляхів
	repo_path = "meta-llama/Llama-2-7b-chat" # Локальний шлях до моделі
	config_path = os.path.join(repo_path, "config.json")
	tokenizer_path = os.path.join(repo_path, "tokenizer.model")

	# Перевірка наявності необхідних файлів
	if not os.path.exists(repo_path):
	raise FileNotFoundError(f"The specified repository path does not exist: {repo_path}")

	required_files = ["config.json", "tokenizer.model", "consolidated.00.pth", "params.json"]
	for file in required_files:
	if not os.path.exists(os.path.join(repo_path, file)):
	raise FileNotFoundError(f"Missing required file in {repo_path}: {file}")

	# Завантаження токенізатора
	print("Loading tokenizer...")
	tokenizer = LlamaTokenizer(vocab_file=tokenizer_path)
	print("Tokenizer loaded successfully!")

	# Завантаження моделі
	print("Loading model...")
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	model = LlamaForCausalLM.from_pretrained(
	repo_path,
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	low_cpu_mem_usage=True,
	)
	model = model.to(device)
	print("Model loaded successfully!")

	# Приклад використання
	input_text = "Привіт! Як ти себе почуваєш сьогодні?"
	inputs = tokenizer(input_text, return_tensors="pt").to(device)

	print("Generating response...")
	with torch.no_grad():
	outputs = model.generate(
	inputs["input_ids"],
	max_length=100,
	temperature=0.7,
	top_k=50,
	top_p=0.9,
	do_sample=True,
	)

	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	print("Response:")
	print(response)