File size: 2,243 Bytes
d553b61
0fd3cfb
4b9ceb2
 
 
0fd3cfb
 
 
3995371
1abf4af
 
3995371
 
0fd3cfb
d8843f7
 
 
 
 
 
 
 
 
 
 
3995371
 
ac78664
3995371
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d8843f7
3995371
0fd3cfb
 
d8843f7
 
3995371
 
 
 
 
0fd3cfb
 
3995371
0fd3cfb
c7ea66e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from transformers import T5Tokenizer, T5Model, T5ForConditionalGeneration, BertTokenizer, EncoderDecoderModel

import gradio as gr
from gradio.mix import Parallel

tokenizer_t5 = T5Tokenizer.from_pretrained("panggi/t5-base-indonesian-summarization-cased")
model_t5 = T5ForConditionalGeneration.from_pretrained("panggi/t5-base-indonesian-summarization-cased")

tokenizer_bert = BertTokenizer.from_pretrained("cahya/bert2bert-indonesian-summarization")
tokenizer_bert.bos_token = tokenizer_bert.cls_token
tokenizer_bert.eos_token = tokenizer_bert.sep_token
model_bert = EncoderDecoderModel.from_pretrained("cahya/bert2bert-indonesian-summarization")

def summ_t5(text):
    input_ids = tokenizer_t5.encode(text, return_tensors='pt')
    summary_ids = model_t5.generate(input_ids,
                max_length=100,
                num_beams=2,
                repetition_penalty=2.5,
                length_penalty=1.0,
                early_stopping=True,
                no_repeat_ngram_size=2,
                use_cache=True)
    summary_text = tokenizer_t5.decode(summary_ids[0], skip_special_tokens=True)
    return summary_text
    
def summ_bert(text):
    input_ids = tokenizer_bert.encode(text, return_tensors='pt')
    summary_ids = model_bert.generate(input_ids,
                min_length=20,
                max_length=100,
                num_beams=10,
                repetition_penalty=2.5,
                length_penalty=1.0,
                early_stopping=True,
                no_repeat_ngram_size=2,
                use_cache=True,
                do_sample = True,
                temperature = 0.8,
                top_k = 50,
                top_p = 0.95)
    summary_text = tokenizer_bert.decode(summary_ids[0], skip_special_tokens=True)
    return summary_text

t5_demo = gr.Interface(
    fn=summ_t5,
    inputs="text",
    outputs=gr.Textbox(lines=10, label="T5 Base Output")
    )
bert_demo = gr.Interface(
    fn=summ_bert,
    inputs="text",
    outputs=gr.Textbox(lines=10, label="Bert2Bert Base Output")
    )

if __name__ == "__main__":
    Parallel(t5_demo, bert_demo,
    inputs=gr.Textbox(lines=10, label="Input Text", placeholder="Enter article here..."),
    title="Summary of Summarizer - Indonesia").launch()