File size: 672 Bytes
694a9b8 f8a0f13 694a9b8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
import gradio as gr
from llama_cpp import Llama
# 初始化模型
MODEL_PATH = "model/qwen2.5-0.5b-instruct-q5_k_m.gguf"
llm = Llama(
model_path=MODEL_PATH,
n_ctx=2048,
n_gpu_layers=0,
)
def chat(user_input):
prompt = f"User: {user_input}\nAssistant: "
output = llm(
prompt=prompt,
temperature=0.7,
top_p=0.9,
max_tokens=100
)
response_text = output['choices'][0]['text'].strip()
return response_text
# 创建 Gradio 接口
iface = gr.Interface(
fn=chat,
inputs="text",
outputs="text",
title="Qwen Chatbot",
description="与 Qwen2.5-0.5B-Instruct-GGUF 模型对话"
)
iface.launch()
|