|
import gradio as gr |
|
import os |
|
import requests |
|
from llama_cpp import Llama |
|
|
|
|
|
model_dir = "/home/user/models" |
|
os.makedirs(model_dir, exist_ok=True) |
|
|
|
|
|
model_name = "unsloth.Q8_0.gguf" |
|
model_path = os.path.join(model_dir, model_name) |
|
|
|
|
|
hf_model_url = "https://huggingface.co/YLX1965/medical-model/resolve/main/unsloth.Q8_0.gguf" |
|
|
|
if not os.path.exists(model_path): |
|
print(f"Downloading model from {hf_model_url}...") |
|
response = requests.get(hf_model_url, stream=True) |
|
with open(model_path, "wb") as f: |
|
for chunk in response.iter_content(chunk_size=8192): |
|
f.write(chunk) |
|
print("Download complete.") |
|
|
|
|
|
llm = Llama(model_path=model_path) |
|
|
|
|
|
def chat(prompt): |
|
output = llm(prompt, max_tokens=200) |
|
return output["choices"][0]["text"] |
|
|
|
|
|
interface = gr.Interface(fn=chat, inputs="text", outputs="text", |
|
title="Medical Chatbot", |
|
description="使用 GGUF 量化模型进行医疗文本生成") |
|
|
|
interface.launch() |