Spaces:
Sleeping
Sleeping
File size: 2,417 Bytes
0b5e09e 4433b13 5f6d6ed 4433b13 5f6d6ed 4433b13 5f6d6ed 4433b13 5f6d6ed f20513c 5f6d6ed f20513c 4433b13 f20513c 4433b13 5f6d6ed 2349d76 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
### STREAMLIT APP ####
import streamlit as st
from transformers import BertTokenizer, EncoderDecoderModel, EncoderDecoderConfig
model_ckpt = 'ardavey/bert2gpt-indosum'
tokenizer = BertTokenizer.from_pretrained(model_ckpt)
tokenizer.bos_token = tokenizer.cls_token
tokenizer.eos_token = tokenizer.sep_token
config = EncoderDecoderConfig.from_pretrained(model_ckpt)
config.early_stopping = True
try:
model = EncoderDecoderModel.from_pretrained(model_ckpt, config=config)
except Exception as e:
st.error(f"An error occurred while loading the model: {e}")
text = st.text_area('Enter an article to summarize:')
if text and len(text) > 0:
# Ensure input is a string
input_ids = tokenizer.encode(text, return_tensors='pt', padding='longest', truncation=True, max_length=512)
summary_ids = model.generate(input_ids,
min_length=40,
max_length=200,
num_beams=10,
repetition_penalty=2.0,
length_penalty=1.0,
no_repeat_ngram_size=3,
use_cache=True,
do_sample=False,
top_k=50,
)
summary_text = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
# Check if summary is empty
if not summary_text.strip():
st.warning("The model couldn't generate a summary.")
else:
# Capitalize the first letter of the summary and after each period
def capitalize_sentences(text):
sentences = text.split('. ')
capitalized_sentences = [sentence[0].upper() + sentence[1:] if sentence else sentence for sentence in sentences]
return '. '.join(capitalized_sentences)
# Correct any wrong terms using the replacement_dict
replacement_dict = {
"optiglain": "OptiGuard",
"telkom university": "Telkom University",
"menyerbut": "menyebut"
}
for wrong_term, correct_term in replacement_dict.items():
summary_text = summary_text.replace(wrong_term, correct_term)
summary_text = capitalize_sentences(summary_text)
st.subheader("Generated Summary:")
st.write(summary_text)
|