Spaces:
Runtime error
Runtime error
File size: 4,056 Bytes
b1d449b 0464b4c abea35b ff9b690 0464b4c ff9b690 0464b4c abea35b ff9b690 b1d449b ff9b690 0464b4c 1a6d301 0464b4c da06a28 b1d449b 0464b4c ff9b690 b1d449b ff9b690 b1d449b 1a6d301 b1d449b da06a28 b1d449b da06a28 b1d449b 0464b4c ff9b690 b1d449b 0464b4c b1d449b 0464b4c abea35b ff9b690 b1d449b da06a28 ff9b690 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import spaces
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import gradio as gr
# Initialize
peft_model_id = "CMLM/ZhongJing-2-1_8b"
base_model_id = "Qwen/Qwen1.5-1.8B-Chat"
model = AutoModelForCausalLM.from_pretrained(base_model_id, device_map="auto")
model.load_adapter(peft_model_id)
tokenizer = AutoTokenizer.from_pretrained(
"CMLM/ZhongJing-2-1_8b",
padding_side="right",
trust_remote_code=True,
pad_token=''
)
# Single turn chat
@spaces.GPU
def single_turn_chat(question):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
prompt = f"Question: {question}"
messages = [
{"role": "system", "content": "You are a helpful TCM assistant named 仲景中医大语言模型, created by 医哲未来. You can switch between Chinese and English based on user preference."},
{"role": "user", "content": prompt}
]
input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([input_text], return_tensors="pt").to(device)
generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512)
generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
return response
# Multi-turn chat
@spaces.GPU
def multi_turn_chat(question, chat_history=None):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
if not isinstance(question, str):
raise ValueError("The question must be a string.")
if chat_history is None or chat_history == []:
chat_history = [{"role": "system", "content": "You are a helpful TCM assistant named 仲景中医大语言模型, created by 医哲未来. You can switch between Chinese and English based on user preference."}]
chat_history.append({"role": "user", "content": question})
# Apply the chat template and prepare the input
inputs = tokenizer.apply_chat_template(chat_history, tokenize=False, add_generation_prompt=True)
model_inputs = tokenizer([inputs], return_tensors="pt").to(device)
try:
# Generate the response from the model
outputs = model.generate(model_inputs.input_ids, max_new_tokens=512)
generated_ids = outputs[:, model_inputs.input_ids.shape[-1]:]
response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
except Exception as e:
raise RuntimeError("Error in model generation: " + str(e))
# Append the assistant's response to the chat history
chat_history.append({"role": "assistant", "content": response})
# Format the chat history for output
formatted_history = []
tempuser = ""
for entry in chat_history:
if entry['role'] == 'user':
tempuser = entry['content']
elif entry['role'] == 'assistant':
formatted_history.append((tempuser, entry['content']))
return formatted_history, chat_history
def clear_history():
return [], []
# Single turn interface
single_turn_interface = gr.Interface(
fn=single_turn_chat,
inputs=["text"],
outputs="text",
title="仲景GPT-V2-1.8B 单轮对话",
description="博极医源,精勤不倦。Unlocking the Wisdom of Traditional Chinese Medicine with AI."
)
# Multi-turn interface
with gr.Blocks() as multi_turn_interface:
chatbot = gr.Chatbot(label="仲景GPT-V2-1.8B 多轮对话")
state = gr.State([])
with gr.Row():
with gr.Column(scale=6):
user_input = gr.Textbox(label="输入", placeholder="输入你的问题")
with gr.Column(scale=6):
submit_button = gr.Button("发送")
submit_button.click(multi_turn_chat, [user_input, state], [chatbot, state])
user_input.submit(multi_turn_chat, [user_input, state], [chatbot, state])
single_turn_interface.launch()
multi_turn_interface.launch()
|