Spaces:

seidel
/

plsum_autowiki

Runtime error

App Files Files Community

plsum_autowiki / app.py

seidel

initial commit

3ad0459 over 2 years ago

raw

history blame contribute delete

2.87 kB

	from venv import create
	import streamlit as st
	from transformers import T5TokenizerFast, T5ForConditionalGeneration
	from tfidf import tfidf, filter_paragraph


	def remove_doc(i):
	if ('docs' in st.session_state):
	if (len(st.session_state['docs']) > i):
	st.session_state['docs'].pop(i)

	def split_sentences(paragraph):
	sentences = paragraph.split(' . ')
	return sentences

	st.markdown('## Use o PLSUM para criar leads do Wikipedia automaticamente')
	st.markdown('''
	Crie resumos no estilo do wikipedia a partir de multiplos documentos.
	Cole textos de referência no formulário a baixo e depois clique em "Gerar resumo".
	''')

	if ('tokenizer' not in st.session_state):
	with st.sidebar:
	st.info('Carregando o tokenizador')
	st.session_state['tokenizer'] = T5TokenizerFast.from_pretrained("seidel/plsum-base-ptt5")

	if ('model' not in st.session_state):
	with st.sidebar:
	st.info('Carregando o modelo')
	st.session_state['model'] = T5ForConditionalGeneration.from_pretrained("seidel/plsum-base-ptt5", use_cache=True)

	if ('docs' not in st.session_state):
	st.session_state['docs'] = []

	with st.form("my_form", clear_on_submit=True):
	new_doc = st.text_area('Cole um documento de referência aqui')
	# Every form must have a submit button.
	submitted = st.form_submit_button("Adicionar texto")

	if submitted:
	if (new_doc != None and new_doc != ''):
	st.session_state['docs'].append(filter_paragraph(new_doc))
	st.info('Documento adicionado')
	else:
	st.error('Adicione algum texto')

	for i, doc in enumerate(st.session_state['docs']):
	with st.sidebar:
	col1, col2 = st.columns([8, 1])
	with col1:
	with st.expander('Documento {}'.format(i+1)):
	st.caption(doc)
	with col2:
	st.button('X', key='remove_{}'.format(i), on_click=remove_doc, args=(i,))

	query = st.text_input('Título do resumo')
	create_summary = st.button('Criar resumo')

	if (create_summary):
	if (query != None and query != ''):
	if (len(st.session_state['docs']) > 0):
	with st.sidebar:
	st.info('Criando resumo')
	sentences = []
	for doc in st.session_state['docs']:
	sentences = sentences + split_sentences(doc)
	filtered_sentences = tfidf(sentences, n_documents=7)
	input_text = 'summarize: {} </s> {}'.format(query.lower(), '</s>'.join(sentences))
	x = st.session_state['tokenizer'](input_text, padding="max_length", max_length=512, return_tensors="pt", truncation=True)
	y = st.session_state['model'].generate(**x)
	summary = st.session_state['tokenizer'].batch_decode(y, skip_special_tokens=True)[0]
	st.markdown('#### {}'.format(query))
	st.markdown('{}'.format(summary))
	else:
	with st.sidebar:
	st.error('Adicione documentos de referência')
	else:
	with st.sidebar:
	st.error('Adicione título para o resumo')