from llama_cpp import Llama | |
# 直接从 Hugging Face 加载模型(避免存储问题) | |
llm = Llama.from_pretrained("YLX1965/medical-model", filename="unsloth.Q8_0.gguf") | |
def chat(prompt): | |
output = llm(prompt, max_tokens=200) | |
return output["choices"][0]["text"] | |
# 运行 Gradio | |
import gradio as gr | |
interface = gr.Interface(fn=chat, inputs="text", outputs="text") | |
interface.launch() |