import streamlit as st import re from transformers import pipeline import torch # Load the text-to-text generation pipeline pipe = pipeline("text2text-generation", model="samanjoy2/bnpunct_banglat5_seq2seq_finetuned", device='cpu') def highlight_punctuation(text, punctuation_marks): punctuation_pattern = '|'.join(map(re.escape, punctuation_marks)) highlighted_text = re.sub(f'({punctuation_pattern})', r'\1', text) return highlighted_text st.title("Bangla Punctutation Restoration 🔨") st.header("Input in Bengali text and get corrected output with proper punctuation marks [। , ?]") # User input for text generation input_text = st.text_area("Enter Bangla text for restoration:", max_chars=400) if st.button("Restore Punctuations"): if input_text: # Remove the Punctuations if there are any input_text = input_text.replace('।', '').replace(',', '').replace('?', '') # Generate text using the pipeline generated_text = pipe(input_text, max_length=512, batch_size=1)[0]['generated_text'] generated_text = highlight_punctuation(generated_text, ["।", ",", "?"]) # Display the generated text st.subheader("Restored Text:") st.write(generated_text, unsafe_allow_html=True) else: st.warning("Please enter text for restoration.")