wiraindrak commited on
Commit
3995371
·
1 Parent(s): d8843f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -2
app.py CHANGED
@@ -6,6 +6,11 @@ from gradio.mix import Parallel
6
  tokenizer_t5 = T5Tokenizer.from_pretrained("panggi/t5-base-indonesian-summarization-cased")
7
  model_t5 = T5ForConditionalGeneration.from_pretrained("panggi/t5-base-indonesian-summarization-cased")
8
 
 
 
 
 
 
9
  def summ_t5(text):
10
  input_ids = tokenizer_t5.encode(text, return_tensors='pt')
11
  summary_ids = model_t5.generate(input_ids,
@@ -18,14 +23,37 @@ def summ_t5(text):
18
  use_cache=True)
19
  summary_text = tokenizer_t5.decode(summary_ids[0], skip_special_tokens=True)
20
  return summary_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
- summ_demo = gr.Interface(
23
  fn=summ_t5,
24
  inputs="text",
25
  outputs=gr.Textbox(lines=10, label="T5 Base Output")
26
  )
 
 
 
 
 
27
 
28
  if __name__ == "__main__":
29
- Parallel(summ_demo,
30
  inputs=gr.Textbox(lines=10, label="Input Text", placeholder="Enter article here..."),
31
  title="Summary of Summarizer - Indonesia").launch()
 
6
  tokenizer_t5 = T5Tokenizer.from_pretrained("panggi/t5-base-indonesian-summarization-cased")
7
  model_t5 = T5ForConditionalGeneration.from_pretrained("panggi/t5-base-indonesian-summarization-cased")
8
 
9
+ tokenizer_bert = BertTokenizer.from_pretrained("cahya/bert2bert-indonesian-summarization")
10
+ tokenizer_bert.bos_token = tokenizer.cls_token
11
+ tokenizer_bert.eos_token = tokenizer.sep_token
12
+ model_bert = EncoderDecoderModel.from_pretrained("cahya/bert2bert-indonesian-summarization")
13
+
14
  def summ_t5(text):
15
  input_ids = tokenizer_t5.encode(text, return_tensors='pt')
16
  summary_ids = model_t5.generate(input_ids,
 
23
  use_cache=True)
24
  summary_text = tokenizer_t5.decode(summary_ids[0], skip_special_tokens=True)
25
  return summary_text
26
+
27
+ def summ_bert(text):
28
+ input_ids = tokenizer_bert.encode(ARTICLE_TO_SUMMARIZE, return_tensors='pt')
29
+ summary_ids = model_bert.generate(input_ids,
30
+ min_length=20,
31
+ max_length=100,
32
+ num_beams=10,
33
+ repetition_penalty=2.5,
34
+ length_penalty=1.0,
35
+ early_stopping=True,
36
+ no_repeat_ngram_size=2,
37
+ use_cache=True,
38
+ do_sample = True,
39
+ temperature = 0.8,
40
+ top_k = 50,
41
+ top_p = 0.95)
42
+ summary_text = tokenizer_bert.decode(summary_ids[0], skip_special_tokens=True)
43
+ return summary_text
44
 
45
+ t5_demo = gr.Interface(
46
  fn=summ_t5,
47
  inputs="text",
48
  outputs=gr.Textbox(lines=10, label="T5 Base Output")
49
  )
50
+ bert_demo = gr.Interface(
51
+ fn=summ_bert,
52
+ inputs="text",
53
+ outputs=gr.Textbox(lines=10, label="Bert2Bert Base Output")
54
+ )
55
 
56
  if __name__ == "__main__":
57
+ Parallel(t5_demo, bert_demo,
58
  inputs=gr.Textbox(lines=10, label="Input Text", placeholder="Enter article here..."),
59
  title="Summary of Summarizer - Indonesia").launch()