ardavey commited on
Commit
4433b13
·
verified ·
1 Parent(s): a8b47f4

create app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ from transformers import BertTokenizer, EncoderDecoderModel, EncoderDecoderConfig
4
+ model_ckpt = 'ardavey/bert2gpt-indosum'
5
+ tokenizer = BertTokenizer.from_pretrained(model_ckpt)
6
+ tokenizer.bos_token = tokenizer.cls_token
7
+ tokenizer.eos_token = tokenizer.sep_token
8
+
9
+ config = EncoderDecoderConfig.from_pretrained(model_ckpt)
10
+ config.early_stopping = True
11
+
12
+ model = EncoderDecoderModel.from_pretrained(model_ckpt, config=config)
13
+
14
+ text = st.text('Enter an article to summarize:')
15
+
16
+ if text:
17
+ input_ids = tokenizer.encode(custom_text, return_tensors='pt', padding=True, truncation=True, max_length=512)
18
+ summary_ids = model.generate(input_ids,
19
+ min_length=40,
20
+ max_length=200,
21
+ num_beams=10,
22
+ repetition_penalty=2.0,
23
+ length_penalty=1.0,
24
+ no_repeat_ngram_size=3,
25
+ use_cache=True,
26
+ do_sample = False,
27
+ top_k = 50,
28
+ )
29
+
30
+ summary_text = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
31
+ # capitalize the first letter of the summary and after each period
32
+ def capitalize_sentences(text):
33
+ sentences = text.split('. ')
34
+ capitalized_sentences = [sentence[0].upper() + sentence[1:] if sentence else sentence for sentence in sentences]
35
+ return '. '.join(capitalized_sentences)
36
+
37
+ # correct any wrong terms using the replacement_dict
38
+ replacement_dict = {
39
+ "optiglain": "OptiGuard",
40
+ "telkom university": "Telkom University",
41
+ "menyerbut": "menyebut"
42
+ }
43
+
44
+ for wrong_term, correct_term in replacement_dict.items():
45
+ summary_text = summary_text.replace(wrong_term, correct_term)
46
+
47
+ summary_text = capitalize_sentences(summary_text)
48
+ st.info(summary_text)
49
+
50
+