import os import requests from llama_cpp import Llama # Define model path MODEL_PATH = "models/llama3.2_3B_Q4.gguf" MODEL_URL = "https://huggingface.co/prithivMLmods/Llama-3.2-3B-GGUF/resolve/main/Llama-3.2-3B-GGUF.Q4_K_M.gguf?download=true" # Download model if not exists if not os.path.exists(MODEL_PATH): print("Downloading model...") with requests.get(MODEL_URL, stream=True) as r: r.raise_for_status() with open(MODEL_PATH, "wb") as f: for chunk in r.iter_content(chunk_size=8192): f.write(chunk) print("Download complete!") # Load model llm = Llama(model_path=MODEL_PATH, n_ctx=4096) # Define system and user prompts system_prompt = "You are a helpful AI assistant." user_prompt = input("User: ") # Generate response output = llm.create_chat_completion( messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}] ) # Print response print("Assistant:", output["choices"][0]["message"]["content"])