Spaces:
Sleeping
Sleeping
### STREAMLIT APP #### | |
import streamlit as st | |
from transformers import BertTokenizer, EncoderDecoderModel, EncoderDecoderConfig | |
model_ckpt = 'ardavey/bert2gpt-indosum' | |
tokenizer = BertTokenizer.from_pretrained(model_ckpt) | |
tokenizer.bos_token = tokenizer.cls_token | |
tokenizer.eos_token = tokenizer.sep_token | |
config = EncoderDecoderConfig.from_pretrained(model_ckpt) | |
config.early_stopping = True | |
try: | |
model = EncoderDecoderModel.from_pretrained(model_ckpt, config=config) | |
except Exception as e: | |
st.error(f"An error occurred while loading the model: {e}") | |
text = st.text_area('Enter an article to summarize:') | |
if text and len(text) > 0: | |
# Ensure input is a string | |
input_ids = tokenizer.encode(text, return_tensors='pt', padding='longest', truncation=True, max_length=512) | |
summary_ids = model.generate(input_ids, | |
min_length=40, | |
max_length=200, | |
num_beams=10, | |
repetition_penalty=2.0, | |
length_penalty=1.0, | |
no_repeat_ngram_size=3, | |
use_cache=True, | |
do_sample=False, | |
top_k=50, | |
) | |
summary_text = tokenizer.decode(summary_ids[0], skip_special_tokens=True) | |
# Check if summary is empty | |
if not summary_text.strip(): | |
st.warning("The model couldn't generate a summary.") | |
else: | |
# Capitalize the first letter of the summary and after each period | |
def capitalize_sentences(text): | |
sentences = text.split('. ') | |
capitalized_sentences = [sentence[0].upper() + sentence[1:] if sentence else sentence for sentence in sentences] | |
return '. '.join(capitalized_sentences) | |
# Correct any wrong terms using the replacement_dict | |
replacement_dict = { | |
"optiglain": "OptiGuard", | |
"telkom university": "Telkom University", | |
"menyerbut": "menyebut" | |
} | |
for wrong_term, correct_term in replacement_dict.items(): | |
summary_text = summary_text.replace(wrong_term, correct_term) | |
summary_text = capitalize_sentences(summary_text) | |
st.subheader("Generated Summary:") | |
st.write(summary_text) | |