wiraindrak's picture
Update app.py
ac78664
raw
history blame
2.24 kB
from transformers import T5Tokenizer, T5Model, T5ForConditionalGeneration, BertTokenizer, EncoderDecoderModel
import gradio as gr
from gradio.mix import Parallel
tokenizer_t5 = T5Tokenizer.from_pretrained("panggi/t5-base-indonesian-summarization-cased")
model_t5 = T5ForConditionalGeneration.from_pretrained("panggi/t5-base-indonesian-summarization-cased")
tokenizer_bert = BertTokenizer.from_pretrained("cahya/bert2bert-indonesian-summarization")
tokenizer_bert.bos_token = tokenizer_bert.cls_token
tokenizer_bert.eos_token = tokenizer_bert.sep_token
model_bert = EncoderDecoderModel.from_pretrained("cahya/bert2bert-indonesian-summarization")
def summ_t5(text):
input_ids = tokenizer_t5.encode(text, return_tensors='pt')
summary_ids = model_t5.generate(input_ids,
max_length=100,
num_beams=2,
repetition_penalty=2.5,
length_penalty=1.0,
early_stopping=True,
no_repeat_ngram_size=2,
use_cache=True)
summary_text = tokenizer_t5.decode(summary_ids[0], skip_special_tokens=True)
return summary_text
def summ_bert(text):
input_ids = tokenizer_bert.encode(text, return_tensors='pt')
summary_ids = model_bert.generate(input_ids,
min_length=20,
max_length=100,
num_beams=10,
repetition_penalty=2.5,
length_penalty=1.0,
early_stopping=True,
no_repeat_ngram_size=2,
use_cache=True,
do_sample = True,
temperature = 0.8,
top_k = 50,
top_p = 0.95)
summary_text = tokenizer_bert.decode(summary_ids[0], skip_special_tokens=True)
return summary_text
t5_demo = gr.Interface(
fn=summ_t5,
inputs="text",
outputs=gr.Textbox(lines=10, label="T5 Base Output")
)
bert_demo = gr.Interface(
fn=summ_bert,
inputs="text",
outputs=gr.Textbox(lines=10, label="Bert2Bert Base Output")
)
if __name__ == "__main__":
Parallel(t5_demo, bert_demo,
inputs=gr.Textbox(lines=10, label="Input Text", placeholder="Enter article here..."),
title="Summary of Summarizer - Indonesia").launch()