Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer | |
import torch | |
import spacy | |
import pytextrank | |
from sumy.parsers.plaintext import PlaintextParser | |
from sumy.nlp.tokenizers import Tokenizer | |
from sumy.summarizers.luhn import LuhnSummarizer | |
from sumy.summarizers.lex_rank import LexRankSummarizer | |
import nltk | |
nlp = spacy.load('pt_core_news_sm') | |
nltk.download('punkt') | |
nlp.add_pipe("textrank") | |
#WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip())) | |
model_name="GiordanoB/mT5_multilingual_XLSum-sumarizacao-PTBR" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
app = gr.Blocks() | |
def summarize_HUB_Multidocument(input_1, input_2, input_3, method, max_length, min_length, num_beams): | |
if(input_1 and not input_2 and not input_3 or not input_1 and input_2 and not input_3 or not input_1 and not input_2 and input_3): | |
return "Por favor utilize a aba de sumarização monodocumento" | |
if method == "Pure mT5": | |
if(input_1 and input_2 and input_3 ): #"3 cheios" | |
tempSum1 = summarize_mT5(input_1, max_length, min_length, num_beams) | |
tempSum2 = summarize_mT5(input_2, max_length, min_length, num_beams) | |
tempSum3 = summarize_mT5(input_3, max_length, min_length, num_beams) | |
fullSumm = tempSum1 + tempSum2 + tempSum3 | |
return summarize_mT5(fullSumm, max_length, min_length, num_beams) | |
if(input_1 and input_2 and not input_3): #"1 e 2 cheios" | |
tempSum1 = summarize_mT5(input_1, max_length, min_length, num_beams) | |
tempSum2 = summarize_mT5(input_2, max_length, min_length, num_beams) | |
fullSumm = tempSum1 + tempSum2 | |
return summarize_mT5(fullSumm, max_length, min_length, num_beams) | |
if(input_1 and not input_2 and input_3): #1 e 3 cheios" | |
tempSum1 = summarize_mT5(input_1, max_length, min_length, num_beams) | |
tempSum3 = summarize_mT5(input_3, max_length, min_length, num_beams) | |
fullSumm = tempSum1 + tempSum3 | |
return summarize_mT5(fullSumm, max_length, min_length, num_beams) | |
if(not input_1 and input_2 and input_3): #"2 e 3 cheios" | |
tempSum2 = summarize_mT5(input_2, max_length, min_length, num_beams) | |
tempSum3 = summarize_mT5(input_3, max_length, min_length, num_beams) | |
fullSumm = tempSum2 + tempSum3 | |
return summarize_mT5(fullSumm, max_length, min_length, num_beams) | |
if method == "Luhn": | |
if(input_1 and input_2 and input_3 ): #"3 cheios" | |
tempSum1 = summarize_Luhn(input_1) | |
tempSum2 = summarize_Luhn(input_2) | |
tempSum3 = summarize_Luhn(input_3) | |
fullSumm = tempSum1 + tempSum2 + tempSum3 | |
return summarize_Luhn(fullSumm) | |
if(input_1 and input_2 and not input_3): #"1 e 2 cheios" | |
tempSum1 = summarize_Luhn(input_1) | |
tempSum2 = summarize_Luhn(input_2) | |
fullSumm = tempSum1 + tempSum2 | |
return summarize_Luhn(fullSumm) | |
if(input_1 and not input_2 and input_3): #1 e 3 cheios" | |
tempSum1 = summarize_Luhn(input_1) | |
tempSum3 = summarize_Luhn(input_3) | |
fullSumm = tempSum1 + tempSum3 | |
return summarize_Luhn(fullSumm) | |
if(not input_1 and input_2 and input_3): #"2 e 3 cheios" | |
tempSum2 = summarize_Luhn(input_2) | |
tempSum3 = summarize_Luhn(input_3) | |
fullSumm = tempSum2 + tempSum3 | |
return summarize_Luhn(fullSumm) | |
if method == "LexRank": | |
if(input_1 and input_2 and input_3 ): #"3 cheios" | |
tempSum1 = summarize_LexRank(input_1) | |
tempSum2 = summarize_LexRank(input_2) | |
tempSum3 = summarize_LexRank(input_3) | |
fullSumm = tempSum1 + tempSum2 + tempSum3 | |
return summarize_LexRank(fullSumm) | |
if(input_1 and input_2 and not input_3): #"1 e 2 cheios" | |
tempSum1 = summarize_LexRank(input_1) | |
tempSum2 = summarize_LexRank(input_2) | |
fullSumm = tempSum1 + tempSum2 | |
return summarize_LexRank(fullSumm) | |
if(input_1 and not input_2 and input_3): #1 e 3 cheios" | |
tempSum1 = summarize_LexRank(input_1) | |
tempSum3 = summarize_LexRank(input_3) | |
fullSumm = tempSum1 + tempSum3 | |
return summarize_LexRank(fullSumm) | |
if(not input_1 and input_2 and input_3): #"2 e 3 cheios" | |
tempSum2 = summarize_LexRank(input_2) | |
tempSum3 = summarize_LexRank(input_3) | |
fullSumm = tempSum2 + tempSum3 | |
return summarize_LexRank(fullSumm) | |
if method == "TextRank": | |
if(input_1 and input_2 and input_3 ): #"3 cheios" | |
tempSum1 = summarize_TextRank(input_1) | |
tempSum2 = summarize_TextRank(input_2) | |
tempSum3 = summarize_TextRank(input_3) | |
fullSumm = tempSum1 + tempSum2 + tempSum3 | |
return summarize_TextRank(fullSumm) | |
if(input_1 and input_2 and not input_3): #"1 e 2 cheios" | |
tempSum1 = summarize_TextRank(input_1) | |
tempSum2 = summarize_TextRank(input_2) | |
fullSumm = tempSum1 + tempSum2 | |
return summarize_TextRank(fullSumm) | |
if(input_1 and not input_2 and input_3): #1 e 3 cheios" | |
tempSum1 = summarize_TextRank(input_1) | |
tempSum3 = summarize_TextRank(input_3) | |
fullSumm = tempSum1 + tempSum3 | |
return summarize_TextRank(fullSumm) | |
if(not input_1 and input_2 and input_3): #"2 e 3 cheios" | |
tempSum2 = summarize_TextRank(input_2) | |
tempSum3 = summarize_TextRank(input_3) | |
fullSumm = tempSum2 + tempSum3 | |
return summarize_TextRank(fullSumm) | |
if method == "Luhn + mT5": | |
if(input_1 and input_2 and input_3 ): #"3 cheios" | |
tempSum1 = summarize_Luhn(input_1) | |
tempSum2 = summarize_Luhn(input_2) | |
tempSum3 = summarize_Luhn(input_3) | |
fullSumm = tempSum1 + tempSum2 + tempSum3 | |
finalSum = summarize_Luhn(fullSumm) | |
return summarize_mT5(finalSum, max_length, min_length, num_beams) | |
if(input_1 and input_2 and not input_3): #"1 e 2 cheios" | |
tempSum1 = summarize_Luhn(input_1) | |
tempSum2 = summarize_Luhn(input_2) | |
fullSumm = tempSum1 + tempSum2 | |
finalSum = summarize_Luhn(fullSumm) | |
return summarize_mT5(finalSum, max_length, min_length, num_beams) | |
if(input_1 and not input_2 and input_3): #1 e 3 cheios" | |
tempSum1 = summarize_Luhn(input_1) | |
tempSum3 = summarize_Luhn(input_3) | |
fullSumm = tempSum1 + tempSum3 | |
finalSum = summarize_Luhn(fullSumm) | |
return summarize_mT5(finalSum, max_length, min_length, num_beams) | |
if(not input_1 and input_2 and input_3): #"2 e 3 cheios" | |
tempSum2 = summarize_Luhn(input_2) | |
tempSum3 = summarize_Luhn(input_3) | |
fullSumm = tempSum2 + tempSum3 | |
finalSum = summarize_Luhn(fullSumm) | |
return summarize_mT5(finalSum, max_length, min_length, num_beams) | |
if method == "LexRank + mT5": | |
if(input_1 and input_2 and input_3 ): #"3 cheios" | |
tempSum1 = summarize_LexRank(input_1) | |
tempSum2 = summarize_LexRank(input_2) | |
tempSum3 = summarize_LexRank(input_3) | |
fullSumm = tempSum1 + tempSum2 + tempSum3 | |
finalSum = summarize_LexRank(fullSumm) | |
return summarize_mT5(finalSum, max_length, min_length, num_beams) | |
if(input_1 and input_2 and not input_3): #"1 e 2 cheios" | |
tempSum1 = summarize_LexRank(input_1) | |
tempSum2 = summarize_LexRank(input_2) | |
fullSumm = tempSum1 + tempSum2 | |
finalSum = summarize_LexRank(fullSumm) | |
return summarize_mT5(finalSum, max_length, min_length, num_beams) | |
if(input_1 and not input_2 and input_3): #1 e 3 cheios" | |
tempSum1 = summarize_LexRank(input_1) | |
tempSum3 = summarize_LexRank(input_3) | |
fullSumm = tempSum1 + tempSum3 | |
finalSum = summarize_LexRank(fullSumm) | |
return summarize_mT5(finalSum, max_length, min_length, num_beams) | |
if(not input_1 and input_2 and input_3): #"2 e 3 cheios" | |
tempSum2 = summarize_LexRank(input_2) | |
tempSum3 = summarize_LexRank(input_3) | |
fullSumm = tempSum2 + tempSum3 | |
finalSum = summarize_LexRank(fullSumm) | |
return summarize_mT5(finalSum, max_length, min_length, num_beams) | |
if method == "TextRank + mT5": | |
if(input_1 and input_2 and input_3 ): #"3 cheios" | |
tempSum1 = summarize_TextRank(input_1) | |
tempSum2 = summarize_TextRank(input_2) | |
tempSum3 = summarize_TextRank(input_3) | |
fullSumm = tempSum1 + tempSum2 + tempSum3 | |
finalSum = summarize_TextRank(fullSumm) | |
return summarize_mT5(finalSum, max_length, min_length, num_beams) | |
if(input_1 and input_2 and not input_3): #"1 e 2 cheios" | |
tempSum1 = summarize_TextRank(input_1) | |
tempSum2 = summarize_TextRank(input_2) | |
fullSumm = tempSum1 + tempSum2 | |
finalSum = summarize_TextRank(fullSumm) | |
return summarize_mT5(finalSum, max_length, min_length, num_beams) | |
if(input_1 and not input_2 and input_3): #1 e 3 cheios" | |
tempSum1 = summarize_TextRank(input_1) | |
tempSum3 = summarize_TextRank(input_3) | |
fullSumm = tempSum1 + tempSum3 | |
finalSum = summarize_TextRank(fullSumm) | |
return summarize_mT5(finalSum, max_length, min_length, num_beams) | |
if(not input_1 and input_2 and input_3): #"2 e 3 cheios" | |
tempSum2 = summarize_TextRank(input_2) | |
tempSum3 = summarize_TextRank(input_3) | |
fullSumm = tempSum2 + tempSum3 | |
finalSum = summarize_TextRank(fullSumm) | |
return summarize_mT5(finalSum, max_length, min_length, num_beams) | |
return "ERROR" | |
def summarize_HUB_Monodocument(input, method, max_length, min_length, num_beams): | |
if method == "Pure mT5": | |
return summarize_mT5(input, max_length, min_length, num_beams) | |
if method == "Luhn": | |
return summarize_Luhn(input) | |
if method == "LexRank": | |
return summarize_LexRank(input) | |
if method == "TextRank": | |
return summarize_TextRank(input) | |
if method == "Luhn + mT5": | |
tempSum = summarize_Luhn(input) | |
return summarize_mT5(tempSum, max_length, min_length, num_beams) | |
if method == "LexRank + mT5": | |
tempSum = summarize_LexRank(input) | |
return summarize_mT5(tempSum, max_length, min_length, num_beams) | |
if method == "TextRank + mT5": | |
tempSum = summarize_TextRank(input) | |
return summarize_mT5(tempSum, max_length, min_length, num_beams) | |
return "ERROR" | |
def summarize_Luhn(input): | |
summ = '' | |
summarizer = LuhnSummarizer() | |
parser = PlaintextParser.from_string(input, Tokenizer("portuguese")) | |
summary_1 = summarizer(parser.document, 3) | |
for sentence in summary_1: | |
summ = summ + ' ' + str(sentence) | |
summ2 = '' | |
summ2 = summ.replace('\n', ' ').replace('\r', '') | |
return summ2 | |
def summarize_LexRank(input): | |
summ = '' | |
summarizer = LexRankSummarizer() | |
parser = PlaintextParser.from_string(input, Tokenizer("portuguese")) | |
summary_1 = summarizer(parser.document, 3) | |
for sentence in summary_1: | |
summ = summ + ' ' + str(sentence) | |
summ2 = '' | |
summ2 = summ.replace('\n', ' ').replace('\r', '') | |
return summ2 | |
def summarize_TextRank(input): | |
summ = '' | |
doc = nlp(input) | |
tr = doc._.textrank | |
for sent in tr.summary(limit_sentences=3): | |
summ = summ + ' ' + str(sent) | |
summ2 = summ.replace('\n', ' ').replace('\r', '') | |
return summ2; | |
def summarize_mT5(input, max_length, min_length, num_beams): | |
for i in range(0,14): | |
input_ids = tokenizer( | |
input, | |
return_tensors="pt", | |
padding="max_length", | |
truncation=True, | |
max_length=512 | |
)["input_ids"] | |
output_ids = model.generate( | |
input_ids=input_ids, | |
max_length=max_length, | |
min_length=min_length, | |
no_repeat_ngram_size=2, | |
num_beams=num_beams | |
)[0] | |
response = tokenizer.decode( | |
output_ids, | |
skip_special_tokens=True, | |
clean_up_tokenization_spaces=False | |
) | |
return response | |
with app: | |
gr.Markdown("Sumarização Monodocumento ou Multidocumento para o português.") | |
with gr.Tabs(): | |
with gr.TabItem("Sumarização Monodocumento"): | |
MonoInputs=[gr.Textbox(label="Texto a ser Sumarizado"),gr.Radio(["Pure mT5","Luhn","LexRank","TextRank","Luhn + mT5","LexRank + mT5","TextRank + mT5"], label="Método"), | |
gr.Slider(50, 500, step=1, value=200, label="Tamanho máximo do Sumário"), gr.Slider(1, 125, step=1, value=50, label="Tamanho mínimo do Sumário"), gr.Slider(1, 10, step=1, value=4, label="Qualidade do sumário")] | |
MonoOutputs=gr.Textbox() | |
MonoButton = gr.Button("Sumarizar Texto") | |
with gr.TabItem("Sumarização Multidocumento"): | |
MultiInputs=[gr.Textbox(label="Texto 1"), gr.Textbox(label="Texto 2"),gr.Textbox(label="Texto 3"),gr.Radio(["Pure mT5","Luhn","LexRank","TextRank","Luhn + mT5","LexRank + mT5","TextRank + mT5"], label="Método"), | |
gr.Slider(50, 500, step=1, value=200, label="Tamanho máximo do Sumário"), gr.Slider(1, 125, step=1, value=50, label="Tamanho mínimo do Sumário"), gr.Slider(1, 10, step=1, value=4, label="Qualidade do sumário")] | |
MultiOutputs=gr.Textbox() | |
MultiButton = gr.Button("Sumarizar Textos") | |
MonoButton.click(summarize_HUB_Monodocument, inputs=MonoInputs, outputs=MonoOutputs) | |
MultiButton.click(summarize_HUB_Multidocument, inputs=MultiInputs, outputs=MultiOutputs) | |
app.launch() |