lossLopes's picture
Update app.py
7fbfa44
raw
history blame
1.7 kB
#import gradio as gr
#gr.load("models/tclopess/bart_samsum").launch()
import gradio as gr
import nltk
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# Carregue o modelo e o tokenizer
checkpoint = "tclopess/bart_samsum"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
# Função para fragmentar o texto
def fragment_text(text, tokenizer):
sentences = nltk.tokenize.sent_tokenize(text)
max_len = tokenizer.max_len_single_sentence
chunks = []
chunk = ""
count = -1
for sentence in sentences:
count += 1
combined_length = len(tokenizer.tokenize(sentence)) + len(chunk)
if combined_length <= max_len:
chunk += sentence + " "
else:
chunks.append(chunk.strip())
chunk = sentence + " "
if chunk != "":
chunks.append(chunk.strip())
return chunks
def generate_summaries(text):
chunks = fragment_text(text, tokenizer)
summaries = []
for chunk in chunks:
input = tokenizer(chunk, return_tensors='pt')
output = model.generate(**input)
summary = tokenizer.decode(*output, skip_special_tokens=True)
summaries.append(summary)
return summaries
# Função para exibir o resumo final
def display_summary(summaries):
summary = " ".join(summaries)
gr.text("Resumo final:", summary)
# Função `input_fn()` para retornar o campo de input
def input_fn():
# Crie um campo de input do tipo `text`
input_text = gr.inputs.Textbox(label="Insira ou cole o texto aqui:")
# Retorne o campo de input
return input_text
# Altere a linha `gr.load()` para usar a função `input_fn()`
gr.load("models/tclopess/bart_samsum").launch(input_fn=input_fn)