import os from huggingface_hub import hf_hub_download from llama_cpp import Llama import gradio as gr # 确保模型存储路径 model_dir = "/home/user/models" os.makedirs(model_dir, exist_ok=True) # 下载 GGUF 模型 model_path = hf_hub_download(repo_id="YLX1965/medical-model", filename="unsloth.Q8_0.gguf", cache_dir=model_dir) # 加载 GGUF 模型 llm = Llama(model_path=model_path) # 定义聊天函数 def chat(prompt): output = llm(prompt, max_tokens=200) return output["choices"][0]["text"] # 运行 Gradio interface = gr.Interface(fn=chat, inputs="text", outputs="text") interface.launch()