csv_corrector / app.py
danielRamon's picture
Create app.py
6ebb80b verified
raw
history blame
2.93 kB
from transformers import pipeline
from tqdm import tqdm
import pandas as pd
import streamlit as st
from io import StringIO
def correct_text(uploaded_file, column_to_correct):
"""
Corrects text in the specified column using a text correction model.
Args:
uploaded_file: DataFrame containing the text to correct
column_to_correct: Index of the column to correct
Returns:
DataFrame with corrected text in a new column
"""
corrector = pipeline("text2text-generation",
model="sdadas/byt5-text-correction")
df = uploaded_file
progress_bar = st.progress(0)
status_text = st.text("Correcting text 🧠...")
for index, row in df.iterrows():
if pd.notna(row.iloc[column_to_correct]):
original_text = str(row.iloc[column_to_correct])
corrected_text = corrector(
"<es>" + original_text, max_length=1024)[0]['generated_text']
# Save corrected text only if different from original
if corrected_text != original_text:
df.loc[index, column_to_correct + 1] = corrected_text
progress = (index + 1) / len(df)
progress_bar.progress(progress)
status_text.text(f"Progress: {int(progress * 100)}% completed ")
return df
def choose_columns(dataframe):
"""
Lets user select columns to correct and displays preview of data.
Args:
dataframe: Input DataFrame
Returns:
Selected column index or None if no selection
"""
st.write("Choose the columns to correct πŸ”")
column_to_correct = st.selectbox(
"Select columns to correct", dataframe.columns)
if column_to_correct:
st.write("Preview of data in selected columns πŸ‘€:")
non_empty_data = dataframe[dataframe[column_to_correct].notna()]
st.dataframe(non_empty_data[column_to_correct].head())
if st.button("Correct Text"):
if column_to_correct is not None:
return dataframe.columns.get_loc(column_to_correct)
else:
st.error("Please select a column before correcting text ❌")
return None
def main():
"""Main function to run the text correction application"""
st.title("CSV text Correction App βœ”")
uploaded_file = st.file_uploader("Choose a CSV file πŸ“„", type=["csv"])
if uploaded_file is not None:
try:
dataframe = pd.read_csv(uploaded_file, encoding='utf-8')
column_index = choose_columns(dataframe)
if column_index is not None:
st.write(correct_text(dataframe, column_index))
except UnicodeDecodeError:
st.error(
"Error: Unable to decode the file. Please check the file encoding or try another file.")
except Exception as e:
st.error(f"An unexpected error occurred: {e}")
if __name__ == "__main__":
main()