Spaces:
Sleeping
Sleeping
File size: 1,007 Bytes
3bad752 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
import os
import requests
from llama_cpp import Llama
# Define model path
MODEL_PATH = "models/llama3.2_3B_Q4.gguf"
MODEL_URL = "https://huggingface.co/prithivMLmods/Llama-3.2-3B-GGUF/resolve/main/Llama-3.2-3B-GGUF.Q4_K_M.gguf?download=true"
# Download model if not exists
if not os.path.exists(MODEL_PATH):
print("Downloading model...")
with requests.get(MODEL_URL, stream=True) as r:
r.raise_for_status()
with open(MODEL_PATH, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
print("Download complete!")
# Load model
llm = Llama(model_path=MODEL_PATH, n_ctx=4096)
# Define system and user prompts
system_prompt = "You are a helpful AI assistant."
user_prompt = input("User: ")
# Generate response
output = llm.create_chat_completion(
messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
)
# Print response
print("Assistant:", output["choices"][0]["message"]["content"]) |