|
|
|
import os |
|
import torch |
|
import re |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
import transformers |
|
import gradio as gr |
|
|
|
|
|
WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip())) |
|
|
|
model_name = "csebuetnlp/mT5_multilingual_XLSum" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name,use_fast=False) |
|
model = AutoModelForSeq2SeqLM.from_pretrained(model_name) |
|
|
|
def generate_summary(text): |
|
|
|
input_ids = tokenizer( |
|
[WHITESPACE_HANDLER(text)], |
|
return_tensors="pt", |
|
padding="max_length", |
|
truncation=True, |
|
max_length=512)["input_ids"] |
|
|
|
output_ids = model.generate( |
|
input_ids=input_ids, |
|
max_length=1024, |
|
no_repeat_ngram_size=2, |
|
num_beams=4 |
|
)[0] |
|
|
|
|
|
summary = tokenizer.decode( |
|
output_ids, |
|
skip_special_tokens=True, |
|
clean_up_tokenization_spaces=False |
|
) |
|
|
|
return summary |
|
|
|
demo = gr.Interface(fn=generate_summary, |
|
inputs=gr.Textbox(lines=100, placeholder="Ingrese Texto"), |
|
outputs=gr.Textbox(lines=10) |
|
) |
|
|
|
demo.launch() |