import gradio as gr from llama_cpp import Llama # 初始化模型 MODEL_PATH = "model/qwen2.5-0.5b-instruct-q5_k_m.gguf" llm = Llama( model_path=MODEL_PATH, n_ctx=2048, n_gpu_layers=0, ) def chat(user_input): prompt = f"User: {user_input}\nAssistant: " output = llm( prompt=prompt, temperature=0.7, top_p=0.9, max_tokens=100 ) response_text = output['choices'][0]['text'].strip() return response_text # 创建 Gradio 接口 iface = gr.Interface( fn=chat, inputs="text", outputs="text", title="Qwen Chatbot", description="与 Qwen2.5-0.5B-Instruct-GGUF 模型对话" ) iface.launch()