GiordanoB's picture
Update app.py
59289ee
import gradio as gr
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import torch
import spacy
import pytextrank
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.luhn import LuhnSummarizer
from sumy.summarizers.lex_rank import LexRankSummarizer
import nltk
nlp = spacy.load('pt_core_news_sm')
nltk.download('punkt')
nlp.add_pipe("textrank")
#WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
model_name="GiordanoB/mT5_multilingual_XLSum-sumarizacao-PTBR"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
app = gr.Blocks()
def summarize_HUB_Multidocument(input_1, input_2, input_3, method, max_length, min_length, num_beams):
if(input_1 and not input_2 and not input_3 or not input_1 and input_2 and not input_3 or not input_1 and not input_2 and input_3):
return "Por favor utilize a aba de sumarização monodocumento"
if method == "Pure mT5":
if(input_1 and input_2 and input_3 ): #"3 cheios"
tempSum1 = summarize_mT5(input_1, max_length, min_length, num_beams)
tempSum2 = summarize_mT5(input_2, max_length, min_length, num_beams)
tempSum3 = summarize_mT5(input_3, max_length, min_length, num_beams)
fullSumm = tempSum1 + tempSum2 + tempSum3
return summarize_mT5(fullSumm, max_length, min_length, num_beams)
if(input_1 and input_2 and not input_3): #"1 e 2 cheios"
tempSum1 = summarize_mT5(input_1, max_length, min_length, num_beams)
tempSum2 = summarize_mT5(input_2, max_length, min_length, num_beams)
fullSumm = tempSum1 + tempSum2
return summarize_mT5(fullSumm, max_length, min_length, num_beams)
if(input_1 and not input_2 and input_3): #1 e 3 cheios"
tempSum1 = summarize_mT5(input_1, max_length, min_length, num_beams)
tempSum3 = summarize_mT5(input_3, max_length, min_length, num_beams)
fullSumm = tempSum1 + tempSum3
return summarize_mT5(fullSumm, max_length, min_length, num_beams)
if(not input_1 and input_2 and input_3): #"2 e 3 cheios"
tempSum2 = summarize_mT5(input_2, max_length, min_length, num_beams)
tempSum3 = summarize_mT5(input_3, max_length, min_length, num_beams)
fullSumm = tempSum2 + tempSum3
return summarize_mT5(fullSumm, max_length, min_length, num_beams)
if method == "Luhn":
if(input_1 and input_2 and input_3 ): #"3 cheios"
tempSum1 = summarize_Luhn(input_1)
tempSum2 = summarize_Luhn(input_2)
tempSum3 = summarize_Luhn(input_3)
fullSumm = tempSum1 + tempSum2 + tempSum3
return summarize_Luhn(fullSumm)
if(input_1 and input_2 and not input_3): #"1 e 2 cheios"
tempSum1 = summarize_Luhn(input_1)
tempSum2 = summarize_Luhn(input_2)
fullSumm = tempSum1 + tempSum2
return summarize_Luhn(fullSumm)
if(input_1 and not input_2 and input_3): #1 e 3 cheios"
tempSum1 = summarize_Luhn(input_1)
tempSum3 = summarize_Luhn(input_3)
fullSumm = tempSum1 + tempSum3
return summarize_Luhn(fullSumm)
if(not input_1 and input_2 and input_3): #"2 e 3 cheios"
tempSum2 = summarize_Luhn(input_2)
tempSum3 = summarize_Luhn(input_3)
fullSumm = tempSum2 + tempSum3
return summarize_Luhn(fullSumm)
if method == "LexRank":
if(input_1 and input_2 and input_3 ): #"3 cheios"
tempSum1 = summarize_LexRank(input_1)
tempSum2 = summarize_LexRank(input_2)
tempSum3 = summarize_LexRank(input_3)
fullSumm = tempSum1 + tempSum2 + tempSum3
return summarize_LexRank(fullSumm)
if(input_1 and input_2 and not input_3): #"1 e 2 cheios"
tempSum1 = summarize_LexRank(input_1)
tempSum2 = summarize_LexRank(input_2)
fullSumm = tempSum1 + tempSum2
return summarize_LexRank(fullSumm)
if(input_1 and not input_2 and input_3): #1 e 3 cheios"
tempSum1 = summarize_LexRank(input_1)
tempSum3 = summarize_LexRank(input_3)
fullSumm = tempSum1 + tempSum3
return summarize_LexRank(fullSumm)
if(not input_1 and input_2 and input_3): #"2 e 3 cheios"
tempSum2 = summarize_LexRank(input_2)
tempSum3 = summarize_LexRank(input_3)
fullSumm = tempSum2 + tempSum3
return summarize_LexRank(fullSumm)
if method == "TextRank":
if(input_1 and input_2 and input_3 ): #"3 cheios"
tempSum1 = summarize_TextRank(input_1)
tempSum2 = summarize_TextRank(input_2)
tempSum3 = summarize_TextRank(input_3)
fullSumm = tempSum1 + tempSum2 + tempSum3
return summarize_TextRank(fullSumm)
if(input_1 and input_2 and not input_3): #"1 e 2 cheios"
tempSum1 = summarize_TextRank(input_1)
tempSum2 = summarize_TextRank(input_2)
fullSumm = tempSum1 + tempSum2
return summarize_TextRank(fullSumm)
if(input_1 and not input_2 and input_3): #1 e 3 cheios"
tempSum1 = summarize_TextRank(input_1)
tempSum3 = summarize_TextRank(input_3)
fullSumm = tempSum1 + tempSum3
return summarize_TextRank(fullSumm)
if(not input_1 and input_2 and input_3): #"2 e 3 cheios"
tempSum2 = summarize_TextRank(input_2)
tempSum3 = summarize_TextRank(input_3)
fullSumm = tempSum2 + tempSum3
return summarize_TextRank(fullSumm)
if method == "Luhn + mT5":
if(input_1 and input_2 and input_3 ): #"3 cheios"
tempSum1 = summarize_Luhn(input_1)
tempSum2 = summarize_Luhn(input_2)
tempSum3 = summarize_Luhn(input_3)
fullSumm = tempSum1 + tempSum2 + tempSum3
finalSum = summarize_Luhn(fullSumm)
return summarize_mT5(finalSum, max_length, min_length, num_beams)
if(input_1 and input_2 and not input_3): #"1 e 2 cheios"
tempSum1 = summarize_Luhn(input_1)
tempSum2 = summarize_Luhn(input_2)
fullSumm = tempSum1 + tempSum2
finalSum = summarize_Luhn(fullSumm)
return summarize_mT5(finalSum, max_length, min_length, num_beams)
if(input_1 and not input_2 and input_3): #1 e 3 cheios"
tempSum1 = summarize_Luhn(input_1)
tempSum3 = summarize_Luhn(input_3)
fullSumm = tempSum1 + tempSum3
finalSum = summarize_Luhn(fullSumm)
return summarize_mT5(finalSum, max_length, min_length, num_beams)
if(not input_1 and input_2 and input_3): #"2 e 3 cheios"
tempSum2 = summarize_Luhn(input_2)
tempSum3 = summarize_Luhn(input_3)
fullSumm = tempSum2 + tempSum3
finalSum = summarize_Luhn(fullSumm)
return summarize_mT5(finalSum, max_length, min_length, num_beams)
if method == "LexRank + mT5":
if(input_1 and input_2 and input_3 ): #"3 cheios"
tempSum1 = summarize_LexRank(input_1)
tempSum2 = summarize_LexRank(input_2)
tempSum3 = summarize_LexRank(input_3)
fullSumm = tempSum1 + tempSum2 + tempSum3
finalSum = summarize_LexRank(fullSumm)
return summarize_mT5(finalSum, max_length, min_length, num_beams)
if(input_1 and input_2 and not input_3): #"1 e 2 cheios"
tempSum1 = summarize_LexRank(input_1)
tempSum2 = summarize_LexRank(input_2)
fullSumm = tempSum1 + tempSum2
finalSum = summarize_LexRank(fullSumm)
return summarize_mT5(finalSum, max_length, min_length, num_beams)
if(input_1 and not input_2 and input_3): #1 e 3 cheios"
tempSum1 = summarize_LexRank(input_1)
tempSum3 = summarize_LexRank(input_3)
fullSumm = tempSum1 + tempSum3
finalSum = summarize_LexRank(fullSumm)
return summarize_mT5(finalSum, max_length, min_length, num_beams)
if(not input_1 and input_2 and input_3): #"2 e 3 cheios"
tempSum2 = summarize_LexRank(input_2)
tempSum3 = summarize_LexRank(input_3)
fullSumm = tempSum2 + tempSum3
finalSum = summarize_LexRank(fullSumm)
return summarize_mT5(finalSum, max_length, min_length, num_beams)
if method == "TextRank + mT5":
if(input_1 and input_2 and input_3 ): #"3 cheios"
tempSum1 = summarize_TextRank(input_1)
tempSum2 = summarize_TextRank(input_2)
tempSum3 = summarize_TextRank(input_3)
fullSumm = tempSum1 + tempSum2 + tempSum3
finalSum = summarize_TextRank(fullSumm)
return summarize_mT5(finalSum, max_length, min_length, num_beams)
if(input_1 and input_2 and not input_3): #"1 e 2 cheios"
tempSum1 = summarize_TextRank(input_1)
tempSum2 = summarize_TextRank(input_2)
fullSumm = tempSum1 + tempSum2
finalSum = summarize_TextRank(fullSumm)
return summarize_mT5(finalSum, max_length, min_length, num_beams)
if(input_1 and not input_2 and input_3): #1 e 3 cheios"
tempSum1 = summarize_TextRank(input_1)
tempSum3 = summarize_TextRank(input_3)
fullSumm = tempSum1 + tempSum3
finalSum = summarize_TextRank(fullSumm)
return summarize_mT5(finalSum, max_length, min_length, num_beams)
if(not input_1 and input_2 and input_3): #"2 e 3 cheios"
tempSum2 = summarize_TextRank(input_2)
tempSum3 = summarize_TextRank(input_3)
fullSumm = tempSum2 + tempSum3
finalSum = summarize_TextRank(fullSumm)
return summarize_mT5(finalSum, max_length, min_length, num_beams)
return "ERROR"
def summarize_HUB_Monodocument(input, method, max_length, min_length, num_beams):
if method == "Pure mT5":
return summarize_mT5(input, max_length, min_length, num_beams)
if method == "Luhn":
return summarize_Luhn(input)
if method == "LexRank":
return summarize_LexRank(input)
if method == "TextRank":
return summarize_TextRank(input)
if method == "Luhn + mT5":
tempSum = summarize_Luhn(input)
return summarize_mT5(tempSum, max_length, min_length, num_beams)
if method == "LexRank + mT5":
tempSum = summarize_LexRank(input)
return summarize_mT5(tempSum, max_length, min_length, num_beams)
if method == "TextRank + mT5":
tempSum = summarize_TextRank(input)
return summarize_mT5(tempSum, max_length, min_length, num_beams)
return "ERROR"
def summarize_Luhn(input):
summ = ''
summarizer = LuhnSummarizer()
parser = PlaintextParser.from_string(input, Tokenizer("portuguese"))
summary_1 = summarizer(parser.document, 3)
for sentence in summary_1:
summ = summ + ' ' + str(sentence)
summ2 = ''
summ2 = summ.replace('\n', ' ').replace('\r', '')
return summ2
def summarize_LexRank(input):
summ = ''
summarizer = LexRankSummarizer()
parser = PlaintextParser.from_string(input, Tokenizer("portuguese"))
summary_1 = summarizer(parser.document, 3)
for sentence in summary_1:
summ = summ + ' ' + str(sentence)
summ2 = ''
summ2 = summ.replace('\n', ' ').replace('\r', '')
return summ2
def summarize_TextRank(input):
summ = ''
doc = nlp(input)
tr = doc._.textrank
for sent in tr.summary(limit_sentences=3):
summ = summ + ' ' + str(sent)
summ2 = summ.replace('\n', ' ').replace('\r', '')
return summ2;
def summarize_mT5(input, max_length, min_length, num_beams):
for i in range(0,14):
input_ids = tokenizer(
input,
return_tensors="pt",
padding="max_length",
truncation=True,
max_length=512
)["input_ids"]
output_ids = model.generate(
input_ids=input_ids,
max_length=max_length,
min_length=min_length,
no_repeat_ngram_size=2,
num_beams=num_beams
)[0]
response = tokenizer.decode(
output_ids,
skip_special_tokens=True,
clean_up_tokenization_spaces=False
)
return response
with app:
gr.Markdown("Sumarização Monodocumento ou Multidocumento para o português.")
with gr.Tabs():
with gr.TabItem("Sumarização Monodocumento"):
MonoInputs=[gr.Textbox(label="Texto a ser Sumarizado"),gr.Radio(["Pure mT5","Luhn","LexRank","TextRank","Luhn + mT5","LexRank + mT5","TextRank + mT5"], label="Método"),
gr.Slider(50, 500, step=1, value=200, label="Tamanho máximo do Sumário"), gr.Slider(1, 125, step=1, value=50, label="Tamanho mínimo do Sumário"), gr.Slider(1, 10, step=1, value=4, label="Qualidade do sumário")]
MonoOutputs=gr.Textbox()
MonoButton = gr.Button("Sumarizar Texto")
with gr.TabItem("Sumarização Multidocumento"):
MultiInputs=[gr.Textbox(label="Texto 1"), gr.Textbox(label="Texto 2"),gr.Textbox(label="Texto 3"),gr.Radio(["Pure mT5","Luhn","LexRank","TextRank","Luhn + mT5","LexRank + mT5","TextRank + mT5"], label="Método"),
gr.Slider(50, 500, step=1, value=200, label="Tamanho máximo do Sumário"), gr.Slider(1, 125, step=1, value=50, label="Tamanho mínimo do Sumário"), gr.Slider(1, 10, step=1, value=4, label="Qualidade do sumário")]
MultiOutputs=gr.Textbox()
MultiButton = gr.Button("Sumarizar Textos")
MonoButton.click(summarize_HUB_Monodocument, inputs=MonoInputs, outputs=MonoOutputs)
MultiButton.click(summarize_HUB_Multidocument, inputs=MultiInputs, outputs=MultiOutputs)
app.launch()