File size: 1,164 Bytes
4ade08c d5c72cc 4ade08c d5c72cc 4ade08c d5c72cc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
def load_model():
# Download the model from HuggingFace
repo_id = "forestav/gguf_lora_model"
model_file = "unsloth.F16.gguf"
local_path = hf_hub_download(
repo_id=repo_id,
filename=model_file
)
# Initialize the model
model = Llama(
model_path=local_path,
n_ctx=2048,
n_threads=8
)
return model
def generate_response(message, history):
# Generate response
response = model.create_chat_completion(
messages=[
{"role": "user", "content": message}
],
max_tokens=512,
temperature=0.7,
top_p=0.95,
)
return response['choices'][0]['message']['content']
# Load model globally
model = load_model()
# Create Gradio interface with updated parameters
demo = gr.ChatInterface(
fn=generate_response,
title="Your GGUF Model Chat",
description="A conversational AI model using GGUF format",
examples=["Continue the fibonacci sequence: 1, 1, 2, 3, 5, 8,"]
)
if __name__ == "__main__":
demo.launch() |