Spaces:

scutcyr
/

ChatBot_Streamlit_Demo

Build error

App Files Files Community

ChatBot_Streamlit_Demo / app.py

scutcyr

Create app.py

7646d66 over 2 years ago

raw

history blame

3.66 kB

	import os
	# install torch and tf
	os.system('pip install transformers SentencePiece')
	os.system('pip install torch')

	# pip install streamlit-chat
	os.system('pip install streamlit-chat')

	from transformers import T5Tokenizer, T5ForConditionalGeneration, AutoTokenizer
	import torch

	import streamlit as st
	from streamlit_chat import message

	# 下载模型
	tokenizer = T5Tokenizer.from_pretrained("ClueAI/ChatYuan-large-v1")
	model = T5ForConditionalGeneration.from_pretrained("ClueAI/ChatYuan-large-v1")
	# 修改colab笔记本设置为gpu，推理更快
	device = torch.device('cpu')
	model.to(device)
	print('Model Load done!')

	def preprocess(text):
	text = text.replace("\n", "\\n").replace("\t", "\\t")
	return text

	def postprocess(text):
	return text.replace("\\n", "\n").replace("\\t", "\t")

	def answer(history, sample=True, top_p=1, temperature=0.7):
	'''sample：是否抽样。生成任务，可以设置为True;
	top_p：0-1之间，生成的内容越多样
	max_new_tokens=512 lost...'''

	preprocess_history = []

	for i in range(len(history)):
	preprocess_history[i] = preprocess(text)

	#text = preprocess(text)
	#print('用户: '+text)
	encoding = tokenizer(text=preprocess_history, truncation=True, padding=True, max_length=768, return_tensors="pt").to(device)
	if not sample:
	out = model.generate(**encoding, return_dict_in_generate=True, output_scores=False, max_new_tokens=512, num_beams=1, length_penalty=0.6)
	else:
	out = model.generate(**encoding, return_dict_in_generate=True, output_scores=False, max_new_tokens=512, do_sample=True, top_p=top_p, temperature=temperature, no_repeat_ngram_size=3)
	out_text = tokenizer.batch_decode(out["sequences"], skip_special_tokens=True)
	print('小元: '+postprocess(out_text[0]))
	return postprocess(out_text[0])

	st.set_page_config(
	page_title="Chinese ChatBot - Demo",
	page_icon=":robot:"
	)

	st.header("Chinese ChatBot - Demo")
	st.markdown("[Github](https://github.com/scutcyr)")

	if 'generated' not in st.session_state:
	st.session_state['generated'] = []

	if 'past' not in st.session_state:
	st.session_state['past'] = []

	def query(history):
	inputs = tokenizer.dialogue_encode(
	history, add_start_token_as_response=True, return_tensors=True, is_split_into_words=False
	)
	inputs["input_ids"] = inputs["input_ids"].astype("int64")
	ids, scores = model.generate(
	input_ids=inputs["input_ids"],
	token_type_ids=inputs["token_type_ids"],
	position_ids=inputs["position_ids"],
	attention_mask=inputs["attention_mask"],
	max_length=64,
	min_length=1,
	decode_strategy="sampling",
	temperature=1.0,
	top_k=5,
	top_p=1.0,
	num_beams=0,
	length_penalty=1.0,
	early_stopping=False,
	num_return_sequences=20,
	)
	max_dec_len = 64
	num_return_sequences = 20
	bot_response = select_response(
	ids, scores, tokenizer, max_dec_len, num_return_sequences, keep_space=False
	)[0]
	return bot_response

	def get_text():
	input_text = st.text_input("用户: ","你好！", key="input")
	return input_text

	history = []
	user_input = get_text()
	history.append(user_input)

	if user_input:
	output = answer(history)
	st.session_state.past.append(user_input)
	st.session_state.generated.append(output)
	history.append(output)

	if st.session_state['generated']:

	for i in range(len(st.session_state['generated'])-1, -1, -1):
	message(st.session_state["generated"][i], key=str(i))
	message(st.session_state['past'][i], is_user=True, key=str(i) + '_user')