|
import gradio as gr |
|
from llama_cpp import Llama |
|
from huggingface_hub import hf_hub_download |
|
|
|
def load_model(): |
|
|
|
repo_id = "forestav/gguf_lora_model" |
|
model_file = "unsloth.F16.gguf" |
|
|
|
local_path = hf_hub_download( |
|
repo_id=repo_id, |
|
filename=model_file |
|
) |
|
|
|
|
|
model = Llama( |
|
model_path=local_path, |
|
n_ctx=2048, |
|
n_threads=8 |
|
) |
|
|
|
return model |
|
|
|
def generate_response(message, history): |
|
|
|
response = model.create_chat_completion( |
|
messages=[ |
|
{"role": "user", "content": message} |
|
], |
|
max_tokens=512, |
|
temperature=0.7, |
|
top_p=0.95, |
|
) |
|
|
|
return response['choices'][0]['message']['content'] |
|
|
|
|
|
model = load_model() |
|
|
|
|
|
demo = gr.ChatInterface( |
|
fn=generate_response, |
|
title="Your GGUF Model Chat", |
|
description="A conversational AI model using GGUF format", |
|
examples=["Continue the fibonacci sequence: 1, 1, 2, 3, 5, 8,"] |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |