File size: 1,447 Bytes
3d08ef2 cb2a723 3d08ef2 94c02b8 3d08ef2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import nltk
import streamlit as st
nltk.download('punkt')
def fragment_text(text, tokenizer):
sentences = nltk.tokenize.sent_tokenize(text)
max_len = tokenizer.max_len_single_sentence
chunks = []
chunk = ""
count = -1
for sentence in sentences:
count += 1
combined_length = len(tokenizer.tokenize(sentence)) + len(chunk)
if combined_length <= max_len:
chunk += sentence + " "
else:
chunks.append(chunk.strip())
chunk = sentence + " "
if chunk != "":
chunks.append(chunk.strip())
return chunks
def summarize_text(text):
chunks = fragment_text(text, tokenizer)
summaries = []
for chunk in chunks:
input = tokenizer(chunk, return_tensors='pt')
output = model.generate(**input)
summary = tokenizer.decode(*output, skip_special_tokens=True)
summaries.append(summary)
final_summary = " ".join(summaries)
return final_summary
# Importa o modelo de sumarização
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# Define o modelo de sumarização
checkpoint = "tclopess/bart_samsum"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
text_input = st.text_area("Cole ou digite o texto a ser resumido:")
button = st.button("Resumo")
# Chama a função de sumarização se o botão for clicado
if button:
summary = summarize_text(text_input)
st.write("Resumo:")
st.write(summary)
|