File size: 4,056 Bytes
b1d449b
0464b4c
 
abea35b
 
ff9b690
0464b4c
 
ff9b690
0464b4c
 
 
 
 
 
 
abea35b
ff9b690
b1d449b
 
ff9b690
 
 
0464b4c
 
1a6d301
0464b4c
 
da06a28
 
b1d449b
 
0464b4c
 
 
ff9b690
b1d449b
 
ff9b690
 
 
b1d449b
 
 
 
1a6d301
b1d449b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
da06a28
b1d449b
 
 
 
da06a28
b1d449b
 
 
 
 
0464b4c
ff9b690
b1d449b
 
0464b4c
 
b1d449b
0464b4c
abea35b
 
ff9b690
b1d449b
 
 
 
 
 
 
 
 
 
 
 
 
da06a28
ff9b690
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import spaces
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import gradio as gr

# Initialize
peft_model_id = "CMLM/ZhongJing-2-1_8b"
base_model_id = "Qwen/Qwen1.5-1.8B-Chat"
model = AutoModelForCausalLM.from_pretrained(base_model_id, device_map="auto")
model.load_adapter(peft_model_id)
tokenizer = AutoTokenizer.from_pretrained(
    "CMLM/ZhongJing-2-1_8b",
    padding_side="right",
    trust_remote_code=True,
    pad_token=''
)

# Single turn chat
@spaces.GPU
def single_turn_chat(question):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    prompt = f"Question: {question}"
    messages = [
        {"role": "system", "content": "You are a helpful TCM assistant named 仲景中医大语言模型, created by 医哲未来. You can switch between Chinese and English based on user preference."},
        {"role": "user", "content": prompt}
    ]
    input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    model_inputs = tokenizer([input_text], return_tensors="pt").to(device)
    generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512)
    generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return response

# Multi-turn chat
@spaces.GPU
def multi_turn_chat(question, chat_history=None):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    
    if not isinstance(question, str):
        raise ValueError("The question must be a string.")
    
    if chat_history is None or chat_history == []:
        chat_history = [{"role": "system", "content": "You are a helpful TCM assistant named 仲景中医大语言模型, created by 医哲未来. You can switch between Chinese and English based on user preference."}]
    
    chat_history.append({"role": "user", "content": question})
    
    # Apply the chat template and prepare the input
    inputs = tokenizer.apply_chat_template(chat_history, tokenize=False, add_generation_prompt=True)
    model_inputs = tokenizer([inputs], return_tensors="pt").to(device)
    
    try:
        # Generate the response from the model
        outputs = model.generate(model_inputs.input_ids, max_new_tokens=512)
        generated_ids = outputs[:, model_inputs.input_ids.shape[-1]:]
        response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    except Exception as e:
        raise RuntimeError("Error in model generation: " + str(e))
    
    # Append the assistant's response to the chat history
    chat_history.append({"role": "assistant", "content": response})
    
    # Format the chat history for output
    formatted_history = []
    tempuser = ""
    for entry in chat_history:
        if entry['role'] == 'user':
            tempuser = entry['content']
        elif entry['role'] == 'assistant':
            formatted_history.append((tempuser, entry['content']))
    
    return formatted_history, chat_history

def clear_history():
    return [], []

# Single turn interface
single_turn_interface = gr.Interface(
    fn=single_turn_chat,
    inputs=["text"],
    outputs="text",
    title="仲景GPT-V2-1.8B 单轮对话",
    description="博极医源,精勤不倦。Unlocking the Wisdom of Traditional Chinese Medicine with AI."
)

# Multi-turn interface
with gr.Blocks() as multi_turn_interface:
    chatbot = gr.Chatbot(label="仲景GPT-V2-1.8B 多轮对话")
    state = gr.State([])
    with gr.Row():
        with gr.Column(scale=6):
            user_input = gr.Textbox(label="输入", placeholder="输入你的问题")
        with gr.Column(scale=6):
            submit_button = gr.Button("发送")
    
    submit_button.click(multi_turn_chat, [user_input, state], [chatbot, state])
    user_input.submit(multi_turn_chat, [user_input, state], [chatbot, state])

single_turn_interface.launch()
multi_turn_interface.launch()