Spaces:

mrmax14
/

Thera

Runtime error

App Files Files Community

mrmax14 commited on Jan 26

Commit

7611348

1 Parent(s): a8594bc

new llama model added

Browse files

Files changed (1) hide show

app.py +41 -67

app.py CHANGED Viewed

@@ -1,79 +1,53 @@
 import os
-import torch
-import json
 from transformers import LlamaTokenizer, LlamaForCausalLM
-import gradio as gr
-from huggingface_hub import login
-# Токен для доступу до Hugging Face Hub
-hf_token = os.getenv("HF_API_TOKEN")
-if not hf_token:
-    raise ValueError("HF_API_TOKEN is not set or invalid.")
-# Логін до Hugging Face Hub
-login(token=hf_token)
-# Шлях до локального репозиторію з моделлю
-repo_path = "meta-llama/Llama-2-7b-chat"
-params_path = os.path.join(repo_path, "params.json")
-model_weights_path = os.path.join(repo_path, "consolidated.00.pth")
 config_path = os.path.join(repo_path, "config.json")
-tokenizer_path = repo_path  # Папка, де зберігається `tokenizer.model`
-# Перевірка чи існує каталог, якщо ні - створення
-os.makedirs(repo_path, exist_ok=True)
-# Якщо файл конфігурації не існує, створюємо його
-if not os.path.exists(config_path):
-    print(f"{config_path} not found. Creating custom config.json...")
-    custom_config = {
-        "architectures": ["LlamaForCausalLM"],
-        "model_type": "llama",
-        "hidden_size": 4096,
-        "num_attention_heads": 32,
-        "num_hidden_layers": 32,
-        "vocab_size": 32000,
-        "max_position_embeddings": 2048,
-        "pad_token_id": 0,
-        "bos_token_id": 1,
-        "eos_token_id": 2
-    }
-    with open(config_path, "w") as f:
-        json.dump(custom_config, f, indent=4)
-    print(f"{config_path} created successfully!")
 # Завантаження токенізатора
-tokenizer = LlamaTokenizer.from_pretrained(tokenizer_path)
-# Завантаження стану моделі
-state_dict = torch.load(model_weights_path, map_location=torch.device("cpu"))
-# Завантаження конфігурації з вашого файлу
-with open(config_path, "r") as f:
-    config = json.load(f)
-# Ініціалізація моделі
 model = LlamaForCausalLM.from_pretrained(
-    pretrained_model_name_or_path=None,
-    state_dict=state_dict,
-    config=config
 )
-# Функція для генерації відповіді
-def generate_response(prompt):
-    inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
-    outputs = model.generate(inputs.input_ids, max_length=200, temperature=0.7)
-    return tokenizer.decode(outputs[0], skip_special_tokens=True)
-# Створення інтерфейсу Gradio
-demo = gr.Interface(
-    fn=generate_response,
-    inputs="text",
-    outputs="text",
-    title="LLaMA 2 Chatbot",
-    description="Chatbot based on LLaMA 2 model."
-)
-if __name__ == "__main__":
-    demo.launch()

 import os
 from transformers import LlamaTokenizer, LlamaForCausalLM
+import torch
+# Налаштування шляхів
+repo_path = "meta-llama/Llama-2-7b-chat"  # Локальний шлях до моделі
 config_path = os.path.join(repo_path, "config.json")
+tokenizer_path = os.path.join(repo_path, "tokenizer.model")
+# Перевірка наявності необхідних файлів
+if not os.path.exists(repo_path):
+    raise FileNotFoundError(f"The specified repository path does not exist: {repo_path}")
+required_files = ["config.json", "tokenizer.model", "consolidated.00.pth", "params.json"]
+for file in required_files:
+    if not os.path.exists(os.path.join(repo_path, file)):
+        raise FileNotFoundError(f"Missing required file in {repo_path}: {file}")
 # Завантаження токенізатора
+print("Loading tokenizer...")
+tokenizer = LlamaTokenizer(vocab_file=tokenizer_path)
+print("Tokenizer loaded successfully!")
+# Завантаження моделі
+print("Loading model...")
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model = LlamaForCausalLM.from_pretrained(
+    repo_path,
+    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+    low_cpu_mem_usage=True,
 )
+model = model.to(device)
+print("Model loaded successfully!")
+# Приклад використання
+input_text = "Привіт! Як ти себе почуваєш сьогодні?"
+inputs = tokenizer(input_text, return_tensors="pt").to(device)
+print("Generating response...")
+with torch.no_grad():
+    outputs = model.generate(
+        inputs["input_ids"],
+        max_length=100,
+        temperature=0.7,
+        top_k=50,
+        top_p=0.9,
+        do_sample=True,
+    )
+response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+print("Response:")
+print(response)