Spaces:
Runtime error
Runtime error
File size: 3,369 Bytes
b020e81 0fd3cfb 4b9ceb2 b6adfce 0fd3cfb d7ed4ab 3995371 b6adfce b020e81 0fd3cfb d8843f7 3995371 061a8cf 74899d4 3995371 d7ed4ab 74899d4 061a8cf 74899d4 4b97b7c 74899d4 4b97b7c 74899d4 b020e81 d8843f7 b309611 061a8cf d7ed4ab 061a8cf 0fd3cfb 1b9db30 34b3fd1 b309611 061a8cf aee9fe8 1b9db30 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
from transformers import T5Tokenizer, T5Model, T5ForConditionalGeneration, BertTokenizer, EncoderDecoderModel, AutoTokenizer, AutoModelForSeq2SeqLM
import gradio as gr
from gradio.mix import Parallel
tokenizer_t5 = T5Tokenizer.from_pretrained("panggi/t5-base-indonesian-summarization-cased")
model_t5 = T5ForConditionalGeneration.from_pretrained("panggi/t5-base-indonesian-summarization-cased")
tokenizer_bert = BertTokenizer.from_pretrained("cahya/bert2bert-indonesian-summarization")
tokenizer_bert.bos_token = tokenizer_bert.cls_token
tokenizer_bert.eos_token = tokenizer_bert.sep_token
model_bert = EncoderDecoderModel.from_pretrained("cahya/bert2bert-indonesian-summarization")
t5_para_tokenizer = AutoTokenizer.from_pretrained("Wikidepia/IndoT5-base-paraphrase")
t5_para_model = AutoModelForSeq2SeqLM.from_pretrained("Wikidepia/IndoT5-base-paraphrase")
def summ_t5(text):
input_ids = tokenizer_t5.encode(text, return_tensors='pt')
summary_ids = model_t5.generate(input_ids,
max_length=100,
num_beams=2,
repetition_penalty=2.5,
length_penalty=1.0,
early_stopping=True,
no_repeat_ngram_size=2,
use_cache=True)
summary_text = tokenizer_t5.decode(summary_ids[0], skip_special_tokens=True)
return summary_text
def summ_bert(text):
input_ids = tokenizer_bert.encode(text, return_tensors="pt")
summary_ids= model_bert.generate(input_ids,
max_length=100,
num_beams=10,
repetition_penalty=2.5,
length_penalty=1.0,
early_stopping=True,
no_repeat_ngram_size=2,
use_cache=True)
summary_text = tokenizer_bert.decode(summary_ids[0], skip_special_tokens=True)
return summary_text
def para_t5(text):
encoding = t5_para_tokenizer(text, padding='longest', return_tensors='pt')
outputs = t5_para_model.generate(
input_ids=encoding["input_ids"],
attention_mask=encoding["attention_mask"],
max_length=100,
do_sample=True,
top_k=120,
top_p=0.95,
early_stopping=True,
num_return_sequences=5)
return [
t5_para_tokenizer.decode(
output, skip_special_tokens=True, clean_up_tokenization_spaces=True
)
for output in outputs
]
def summarize(text):
t5_ = summ_t5(text)
bert_ = summ_bert(text)
para_ = para_t5(t5_)
return t5_, bert_, para_
if __name__ == "__main__":
with gr.Blocks() as demo:
gr.Markdown("""<h1 style="text-align:center">Summary of Summarizer - Indonesia</h1>""")
gr.Markdown(
"""
Creator: wiraindrak
"""
)
with gr.Row():
with gr.Column():
input_text = gr.Textbox(label="Input Text")
analyze_button = gr.Button(label="Analyze")
with gr.Column():
t5_output = gr.Textbox(label="T5 Base Output")
bert_output = gr.Textbox(label="Bert2Bert Base Output")
para_output = gr.Textbox(label="T5 Paraphrase Output")
analyze_button.click(summarize, inputs=input_text, outputs=[t5_output, bert_output, para_output])
demo.launch() |