from transformers import AutoTokenizer, AutoModel import gradio as gr import time tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True) model = AutoModel.from_pretrained("THUDM/chatglm-6b-int4", trust_remote_code=True).float() glm_history = [] def reset_glm(): global glm_history settings = "请记住以下设定:你后续跟我的聊天过程中不可以出现“ChatGLM”这个单词,如果你被要求自我介绍,就说自己是基于AI的聊天模型" response, history = model.chat(tokenizer, settings, history =[]) glm_history = history def chat_with_glm(input): print("询问: ", input) global glm_history response, history = model.chat(tokenizer, input, history = glm_history) glm_history = history print("回答: ", response) print('----------------------') return response def on_submit(input, history): global glm_history respond = "[" + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + "]\n\n" if input == '--history': if len(history) > 0: history_str = "" for h in history: if len(history_str) > 0: history_str += "\n" history_str += '\n'.join(h) history.append((input, respond + history_str)) elif input == '--clear': history = [] reset_glm() else: history.append((input, respond + chat_with_glm(input))) return "", history with gr.Blocks() as demo: gr.Markdown("