File size: 2,417 Bytes
0b5e09e
4433b13
 
5f6d6ed
4433b13
 
 
 
 
 
 
 
5f6d6ed
 
 
 
4433b13
5f6d6ed
4433b13
5f6d6ed
f20513c
5f6d6ed
f20513c
4433b13
f20513c
 
 
 
 
 
 
 
 
 
4433b13
 
 
5f6d6ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2349d76
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
### STREAMLIT APP ####
import streamlit as st
from transformers import BertTokenizer, EncoderDecoderModel, EncoderDecoderConfig

model_ckpt = 'ardavey/bert2gpt-indosum'
tokenizer = BertTokenizer.from_pretrained(model_ckpt)
tokenizer.bos_token = tokenizer.cls_token
tokenizer.eos_token = tokenizer.sep_token

config = EncoderDecoderConfig.from_pretrained(model_ckpt)
config.early_stopping = True

try:
    model = EncoderDecoderModel.from_pretrained(model_ckpt, config=config)
except Exception as e:
    st.error(f"An error occurred while loading the model: {e}")

text = st.text_area('Enter an article to summarize:')

if text and len(text) > 0:
    # Ensure input is a string
    input_ids = tokenizer.encode(text, return_tensors='pt', padding='longest', truncation=True, max_length=512)
    
    summary_ids = model.generate(input_ids,
                                  min_length=40,
                                  max_length=200,
                                  num_beams=10,
                                  repetition_penalty=2.0,
                                  length_penalty=1.0,
                                  no_repeat_ngram_size=3,
                                  use_cache=True,
                                  do_sample=False,
                                  top_k=50,
                                  )

    summary_text = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    
    # Check if summary is empty
    if not summary_text.strip():
        st.warning("The model couldn't generate a summary.")
    else:
        # Capitalize the first letter of the summary and after each period
        def capitalize_sentences(text):
            sentences = text.split('. ')
            capitalized_sentences = [sentence[0].upper() + sentence[1:] if sentence else sentence for sentence in sentences]
            return '. '.join(capitalized_sentences)
        
        # Correct any wrong terms using the replacement_dict
        replacement_dict = {
            "optiglain": "OptiGuard",
            "telkom university": "Telkom University",
            "menyerbut": "menyebut"
        }
        
        for wrong_term, correct_term in replacement_dict.items():
            summary_text = summary_text.replace(wrong_term, correct_term)
        
        summary_text = capitalize_sentences(summary_text)

        st.subheader("Generated Summary:")
        st.write(summary_text)