Spaces:
Running
on
Zero
Running
on
Zero
File size: 6,226 Bytes
bbe7b0a f3b7005 d4c9a92 6c69482 0464b4c d71ad7e bbe7b0a 86ba4d3 ea3aa47 74f56e5 df10446 74f56e5 bbe7b0a af7806f bbe7b0a ea3aa47 bbe7b0a 5cb869c 74f56e5 bbe7b0a abea35b 4ed0b9b 5cb869c d71ad7e 4ed0b9b abea35b 314eed8 4ed0b9b bbe7b0a 4ed0b9b 314eed8 bbe7b0a 0db1a0f bbe7b0a 8560f66 6e999ef 4ed0b9b bbe7b0a 4ed0b9b 314eed8 4ed0b9b bbe7b0a 4ed0b9b ea3aa47 bbe7b0a 4ed0b9b bbe7b0a 4ed0b9b bbe7b0a 4ed0b9b bbe7b0a 4ed0b9b bbe7b0a 4ed0b9b bbe7b0a 4ed0b9b bbe7b0a d71ad7e 746a084 d71ad7e 74f56e5 0db1a0f 4ed0b9b 7161b69 b1d449b 0db1a0f ad01e97 0db1a0f bbe7b0a 0db1a0f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
import os
from threading import Thread
from typing import Iterator
import gradio as gr
import spaces
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
MAX_MAX_NEW_TOKENS = 2048
DEFAULT_MAX_NEW_TOKENS = 1024
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
DESCRIPTION = """\
# FulPhil-仲景中医大语言模型-ZhongJingGPT-V2-1_8b-First LLM in TCM
致敬仲景先师,融汇古典与智能。本模型由医哲未来(FulPhil)研发,是专注于中医药领域的强大语言模型。它能够运用传统中医理论,结合现代人工智能技术,为中医研究和应用提供卓越助力。欢迎关注我们的 [GitHub主页](https://github.com/pariskang/CMLM-ZhongJing) 及模型 [ZhongJing-2-1_8b](https://huggingface.co/CMLM/ZhongJing-2-1_8b) 下载体验!
Paying tribute to the ancient master Zhang Zhongjing, this model integrates classical knowledge with modern intelligence. Developed by FulPhil (Future Medicine Philosophy), it is a powerful language model focused on the field of Traditional Chinese Medicine (TCM). It employs traditional TCM theories combined with contemporary artificial intelligence technology to provide excellent support for TCM research and applications. Welcome to visit our [GitHub homepage](https://github.com/pariskang/CMLM-ZhongJing) and download the model [ZhongJing-2-1_8b-merge](https://huggingface.co/CMLM/ZhongJing-2-1_8b) for a trial experience!
请注意!!!本模型不得用于任何医疗或潜在具有医疗或康养建议的任何场景,目前仍为科研测试阶段,敬请帮我们提出宝贵意见,谢谢。
Please note!!! This model should not be used for any medical purposes or scenarios potentially involving medical or health advice. It is currently still in the research and testing stage. We sincerely request your valuable feedback. Thank you.
"""
LICENSE = """
<p/>
---
As a derivate work of [ZhongJing-3-1_5b](https://huggingface.co/CMLM/ZhongJing-2-1_8b) by FulPhil,
this demo is governed by the original [license](https://huggingface.co/CMLM/ZhongJing-2-1_8b/blob/main/LICENSE.txt) and [acceptable use policy](https://huggingface.co/CMLM/ZhongJing-2-1_8b/blob/main/USE_POLICY.md).
"""
if not torch.cuda.is_available():
DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
if torch.cuda.is_available():
model_id = "CMLL/ZhongJing-3-1_5b_V2"
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.use_default_system_prompt = False
# 定义 system_prompt 变量
system_prompt = "You are a helpful TCM assistant named 仲景中医大语言模型, created by 医哲未来. You can switch between Chinese and English based on user preference."
@spaces.GPU
def generate(
message: str,
chat_history: list[tuple[str, str]],
system_prompt: str = system_prompt,
max_new_tokens: int = 1024,
temperature: float = 0.6,
top_p: float = 0.95,
top_k: int = 50,
repetition_penalty: float = 1.2,
) -> Iterator[str]:
conversation = [{"role": "system", "content": system_prompt}]
for user, assistant in chat_history:
conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
conversation.append({"role": "user", "content": message})
input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
input_ids = input_ids.to(model.device)
streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
generate_kwargs = dict(
{"input_ids": input_ids},
streamer=streamer,
max_new_tokens=max_new_tokens,
do_sample=True,
top_p=top_p,
top_k=top_k,
temperature=temperature,
num_beams=1,
repetition_penalty=repetition_penalty,
)
t = Thread(target=model.generate, kwargs=generate_kwargs)
t.start()
outputs = []
for text in streamer:
outputs.append(text)
yield "".join(outputs)
chat_interface = gr.ChatInterface(
fn=generate,
additional_inputs=[
gr.Textbox(
label="System prompt",
lines=6,
value=system_prompt,
),
gr.Slider(
label="Max new tokens",
minimum=1,
maximum=MAX_MAX_NEW_TOKENS,
step=1,
value=DEFAULT_MAX_NEW_TOKENS,
),
gr.Slider(
label="Temperature",
minimum=0.1,
maximum=4.0,
step=0.1,
value=0.6,
),
gr.Slider(
label="Top-p (nucleus sampling)",
minimum=0.05,
maximum=1.0,
step=0.05,
value=0.9,
),
gr.Slider(
label="Top-k",
minimum=1,
maximum=1000,
step=1,
value=50,
),
gr.Slider(
label="Repetition penalty",
minimum=1.0,
maximum=2.0,
step=0.05,
value=1.2,
),
],
stop_btn=None,
examples=[
["你能简要解释一下什么是中医吗?"],
["简述《伤寒杂病论》的主要内容。"],
["中医如何治疗失眠?"],
["我发热,咳嗽,咽痛,舌苔黄腻,脉滑数,请给出中医诊断及处方?"],
["写一篇关于‘AI在中医研究中的应用’的100字文章。"],
["写一篇从中医角度关于‘秋季女性健康调养方案‘的1000字科普文章,从季节变化、饮食调理、活动养生等方面进行阐述"],
],
)
with gr.Blocks(css="style.css") as demo:
gr.Markdown(DESCRIPTION)
gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
chat_interface.render()
gr.Markdown(LICENSE)
if __name__ == "__main__":
demo.queue(max_size=20).launch()
|