ardavey commited on
Commit
3307bd7
·
verified ·
1 Parent(s): 839e239

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -50
app.py CHANGED
@@ -1,50 +0,0 @@
1
- import streamlit as st
2
-
3
- from transformers import BertTokenizer, EncoderDecoderModel, EncoderDecoderConfig
4
- model_ckpt = 'ardavey/bert2gpt-indosum'
5
- tokenizer = BertTokenizer.from_pretrained(model_ckpt)
6
- tokenizer.bos_token = tokenizer.cls_token
7
- tokenizer.eos_token = tokenizer.sep_token
8
-
9
- config = EncoderDecoderConfig.from_pretrained(model_ckpt)
10
- config.early_stopping = True
11
-
12
- model = EncoderDecoderModel.from_pretrained(model_ckpt, config=config)
13
-
14
- text = st.text('Enter an article to summarize:')
15
-
16
- if text:
17
- input_ids = tokenizer.encode(custom_text, return_tensors='pt', padding=True, truncation=True, max_length=512)
18
- summary_ids = model.generate(input_ids,
19
- min_length=40,
20
- max_length=200,
21
- num_beams=10,
22
- repetition_penalty=2.0,
23
- length_penalty=1.0,
24
- no_repeat_ngram_size=3,
25
- use_cache=True,
26
- do_sample = False,
27
- top_k = 50,
28
- )
29
-
30
- summary_text = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
31
- # capitalize the first letter of the summary and after each period
32
- def capitalize_sentences(text):
33
- sentences = text.split('. ')
34
- capitalized_sentences = [sentence[0].upper() + sentence[1:] if sentence else sentence for sentence in sentences]
35
- return '. '.join(capitalized_sentences)
36
-
37
- # correct any wrong terms using the replacement_dict
38
- replacement_dict = {
39
- "optiglain": "OptiGuard",
40
- "telkom university": "Telkom University",
41
- "menyerbut": "menyebut"
42
- }
43
-
44
- for wrong_term, correct_term in replacement_dict.items():
45
- summary_text = summary_text.replace(wrong_term, correct_term)
46
-
47
- summary_text = capitalize_sentences(summary_text)
48
- st.info(summary_text)
49
-
50
-