File size: 1,385 Bytes
3146a45
a36a8d3
3146a45
 
 
 
 
 
4668b3a
 
 
4b69cb8
4668b3a
 
d075973
 
3146a45
 
5695f5f
3146a45
 
 
d075973
 
3146a45
 
4668b3a
3146a45
d075973
4668b3a
3146a45
d075973
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import streamlit as st
import re
from transformers import pipeline
import torch

# Load the text-to-text generation pipeline
pipe = pipeline("text2text-generation", model="samanjoy2/bnpunct_banglat5_seq2seq_finetuned", device='cpu')


def highlight_punctuation(text, punctuation_marks):
    punctuation_pattern = '|'.join(map(re.escape, punctuation_marks))
    highlighted_text = re.sub(f'({punctuation_pattern})', r'<span style="color: green; font-weight: bold;">\1</span>', text)
    return highlighted_text

st.title("Bangla Punctutation Restoration 🔨")
st.header("Input in Bengali text and get corrected output with proper punctuation marks [। , ?]")

# User input for text generation
input_text = st.text_area("Enter Bangla text for restoration:", max_chars=400)

if st.button("Restore Punctuations"):
    if input_text:
        # Remove the Punctuations if there are any
        input_text = input_text.replace('।', '').replace(',', '').replace('?', '')
        # Generate text using the pipeline
        generated_text = pipe(input_text, max_length=512, batch_size=1)[0]['generated_text']
        generated_text = highlight_punctuation(generated_text, ["।", ",", "?"])
        # Display the generated text
        st.subheader("Restored Text:")
        st.write(generated_text, unsafe_allow_html=True)
    else:
        st.warning("Please enter text for restoration.")