ardavey commited on
Commit
5f6d6ed
·
verified ·
1 Parent(s): f5c457d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -26
app.py CHANGED
@@ -1,7 +1,7 @@
1
  ### STREAMLIT APP ####
2
  import streamlit as st
3
-
4
  from transformers import BertTokenizer, EncoderDecoderModel, EncoderDecoderConfig
 
5
  model_ckpt = 'ardavey/bert2gpt-indosum'
6
  tokenizer = BertTokenizer.from_pretrained(model_ckpt)
7
  tokenizer.bos_token = tokenizer.cls_token
@@ -10,12 +10,15 @@ tokenizer.eos_token = tokenizer.sep_token
10
  config = EncoderDecoderConfig.from_pretrained(model_ckpt)
11
  config.early_stopping = True
12
 
13
- model = EncoderDecoderModel.from_pretrained(model_ckpt, config=config)
 
 
 
14
 
15
- text = st.text('Enter an article to summarize:')
16
 
17
- if text:
18
- input_ids = tokenizer.encode(text, return_tensors='pt', padding=True, truncation=True, max_length=512)
19
  summary_ids = model.generate(input_ids,
20
  min_length=40,
21
  max_length=200,
@@ -24,28 +27,31 @@ if text:
24
  length_penalty=1.0,
25
  no_repeat_ngram_size=3,
26
  use_cache=True,
27
- do_sample = False,
28
- top_k = 50,
29
  )
30
 
31
  summary_text = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
32
- # capitalize the first letter of the summary and after each period
33
- def capitalize_sentences(text):
34
- sentences = text.split('. ')
35
- capitalized_sentences = [sentence[0].upper() + sentence[1:] if sentence else sentence for sentence in sentences]
36
- return '. '.join(capitalized_sentences)
37
-
38
- # correct any wrong terms using the replacement_dict
39
- replacement_dict = {
40
- "optiglain": "OptiGuard",
41
- "telkom university": "Telkom University",
42
- "menyerbut": "menyebut"
43
- }
44
 
45
- for wrong_term, correct_term in replacement_dict.items():
46
- summary_text = summary_text.replace(wrong_term, correct_term)
47
-
48
- summary_text = capitalize_sentences(summary_text)
49
- st.info(summary_text)
50
-
51
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ### STREAMLIT APP ####
2
  import streamlit as st
 
3
  from transformers import BertTokenizer, EncoderDecoderModel, EncoderDecoderConfig
4
+
5
  model_ckpt = 'ardavey/bert2gpt-indosum'
6
  tokenizer = BertTokenizer.from_pretrained(model_ckpt)
7
  tokenizer.bos_token = tokenizer.cls_token
 
10
  config = EncoderDecoderConfig.from_pretrained(model_ckpt)
11
  config.early_stopping = True
12
 
13
+ try:
14
+ model = EncoderDecoderModel.from_pretrained(model_ckpt, config=config)
15
+ except Exception as e:
16
+ st.error(f"An error occurred while loading the model: {e}")
17
 
18
+ text = st.text_area('Enter an article to summarize:')
19
 
20
+ if text and len(text) > 0:
21
+ input_ids = tokenizer.encode(text, return_tensors='pt', padding='longest', truncation=True, max_length=512)
22
  summary_ids = model.generate(input_ids,
23
  min_length=40,
24
  max_length=200,
 
27
  length_penalty=1.0,
28
  no_repeat_ngram_size=3,
29
  use_cache=True,
30
+ do_sample=False,
31
+ top_k=50,
32
  )
33
 
34
  summary_text = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
+ # Check if summary is empty
37
+ if not summary_text.strip():
38
+ st.warning("The model couldn't generate a summary.")
39
+ else:
40
+ # Capitalize the first letter of the summary and after each period
41
+ def capitalize_sentences(text):
42
+ sentences = text.split('. ')
43
+ capitalized_sentences = [sentence[0].upper() + sentence[1:] if sentence else sentence for sentence in sentences]
44
+ return '. '.join(capitalized_sentences)
45
+
46
+ # Correct any wrong terms using the replacement_dict
47
+ replacement_dict = {
48
+ "optiglain": "OptiGuard",
49
+ "telkom university": "Telkom University",
50
+ "menyerbut": "menyebut"
51
+ }
52
+
53
+ for wrong_term, correct_term in replacement_dict.items():
54
+ summary_text = summary_text.replace(wrong_term, correct_term)
55
+
56
+ summary_text = capitalize_sentences(summary_text)
57
+ st.info(summary_text)