from venv import create import streamlit as st from transformers import T5TokenizerFast, T5ForConditionalGeneration from tfidf import tfidf, filter_paragraph def remove_doc(i): if ('docs' in st.session_state): if (len(st.session_state['docs']) > i): st.session_state['docs'].pop(i) def split_sentences(paragraph): sentences = paragraph.split(' . ') return sentences st.markdown('## Use o PLSUM para criar leads do Wikipedia automaticamente') st.markdown(''' Crie resumos no estilo do wikipedia a partir de multiplos documentos. Cole textos de referência no formulário a baixo e depois clique em "Gerar resumo". ''') if ('tokenizer' not in st.session_state): with st.sidebar: st.info('Carregando o tokenizador') st.session_state['tokenizer'] = T5TokenizerFast.from_pretrained("seidel/plsum-base-ptt5") if ('model' not in st.session_state): with st.sidebar: st.info('Carregando o modelo') st.session_state['model'] = T5ForConditionalGeneration.from_pretrained("seidel/plsum-base-ptt5", use_cache=True) if ('docs' not in st.session_state): st.session_state['docs'] = [] with st.form("my_form", clear_on_submit=True): new_doc = st.text_area('Cole um documento de referência aqui') # Every form must have a submit button. submitted = st.form_submit_button("Adicionar texto") if submitted: if (new_doc != None and new_doc != ''): st.session_state['docs'].append(filter_paragraph(new_doc)) st.info('Documento adicionado') else: st.error('Adicione algum texto') for i, doc in enumerate(st.session_state['docs']): with st.sidebar: col1, col2 = st.columns([8, 1]) with col1: with st.expander('Documento {}'.format(i+1)): st.caption(doc) with col2: st.button('X', key='remove_{}'.format(i), on_click=remove_doc, args=(i,)) query = st.text_input('Título do resumo') create_summary = st.button('Criar resumo') if (create_summary): if (query != None and query != ''): if (len(st.session_state['docs']) > 0): with st.sidebar: st.info('Criando resumo') sentences = [] for doc in st.session_state['docs']: sentences = sentences + split_sentences(doc) filtered_sentences = tfidf(sentences, n_documents=7) input_text = 'summarize: {} {}'.format(query.lower(), ''.join(sentences)) x = st.session_state['tokenizer'](input_text, padding="max_length", max_length=512, return_tensors="pt", truncation=True) y = st.session_state['model'].generate(**x) summary = st.session_state['tokenizer'].batch_decode(y, skip_special_tokens=True)[0] st.markdown('#### {}'.format(query)) st.markdown('{}'.format(summary)) else: with st.sidebar: st.error('Adicione documentos de referência') else: with st.sidebar: st.error('Adicione título para o resumo')