|
import gradio as gr |
|
from llama_cpp import Llama |
|
|
|
|
|
MODEL_PATH = "model/qwen2.5-0.5b-instruct-q5_k_m.gguf" |
|
|
|
llm = Llama( |
|
model_path=MODEL_PATH, |
|
n_ctx=2048, |
|
n_gpu_layers=0, |
|
) |
|
|
|
def chat(user_input): |
|
prompt = f"User: {user_input}\nAssistant: " |
|
output = llm( |
|
prompt=prompt, |
|
temperature=0.7, |
|
top_p=0.9, |
|
max_tokens=100 |
|
) |
|
response_text = output['choices'][0]['text'].strip() |
|
return response_text |
|
|
|
|
|
iface = gr.Interface( |
|
fn=chat, |
|
inputs="text", |
|
outputs="text", |
|
title="Qwen Chatbot", |
|
description="与 Qwen2.5-0.5B-Instruct-GGUF 模型对话" |
|
) |
|
|
|
iface.launch() |
|
|