### STREAMLIT APP #### import streamlit as st from transformers import BertTokenizer, EncoderDecoderModel, EncoderDecoderConfig model_ckpt = 'ardavey/bert2gpt-indosum' tokenizer = BertTokenizer.from_pretrained(model_ckpt) tokenizer.bos_token = tokenizer.cls_token tokenizer.eos_token = tokenizer.sep_token config = EncoderDecoderConfig.from_pretrained(model_ckpt) config.early_stopping = True try: model = EncoderDecoderModel.from_pretrained(model_ckpt, config=config) except Exception as e: st.error(f"An error occurred while loading the model: {e}") text = st.text_area('Enter an article to summarize:') if text and len(text) > 0: # Ensure input is a string input_ids = tokenizer.encode(text, return_tensors='pt', padding='longest', truncation=True, max_length=512) summary_ids = model.generate(input_ids, min_length=40, max_length=200, num_beams=10, repetition_penalty=2.0, length_penalty=1.0, no_repeat_ngram_size=3, use_cache=True, do_sample=False, top_k=50, ) summary_text = tokenizer.decode(summary_ids[0], skip_special_tokens=True) # Check if summary is empty if not summary_text.strip(): st.warning("The model couldn't generate a summary.") else: # Capitalize the first letter of the summary and after each period def capitalize_sentences(text): sentences = text.split('. ') capitalized_sentences = [sentence[0].upper() + sentence[1:] if sentence else sentence for sentence in sentences] return '. '.join(capitalized_sentences) # Correct any wrong terms using the replacement_dict replacement_dict = { "optiglain": "OptiGuard", "telkom university": "Telkom University", "menyerbut": "menyebut" } for wrong_term, correct_term in replacement_dict.items(): summary_text = summary_text.replace(wrong_term, correct_term) summary_text = capitalize_sentences(summary_text) st.subheader("Generated Summary:") st.write(summary_text)