Spaces:

sitwala
/

test_sentence_splitter

Sleeping

SitwalaM commited on Sep 27, 2024

Commit

7f0f75f

1 Parent(s): 496220c

added file upload and outputs download

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,11 +1,19 @@
 import streamlit as st
 from dolma.core.utils import split_paragraphs, split_sentences
 # Title of the Streamlit app
 st.title('Text Splitter: Paragraphs and Sentences')
-# Text input from user
-sample_text = st.text_area("Paste your text below", height=300)
 if sample_text:
     # Split the text into paragraphs
@@ -35,5 +43,24 @@ if sample_text:
             st.subheader(f"Sentence {i + 1}")
             st.write(sentence.text)
 else:
-    st.write("Please paste your text to split it into paragraphs and sentences.")

 import streamlit as st
+import pandas as pd
 from dolma.core.utils import split_paragraphs, split_sentences
 # Title of the Streamlit app
 st.title('Text Splitter: Paragraphs and Sentences')
+# File uploader for text document
+uploaded_file = st.file_uploader("Upload a text file", type=["txt"])
+if uploaded_file:
+    # Read the uploaded text file
+    sample_text = uploaded_file.read().decode("utf-8")
+else:
+    # Text input from user
+    sample_text = st.text_area("Or paste your text below", height=300)
 if sample_text:
     # Split the text into paragraphs
             st.subheader(f"Sentence {i + 1}")
             st.write(sentence.text)
+    # Convert paragraphs and sentences to pandas DataFrames
+    paragraphs_df = pd.DataFrame([p.text for p in paragraphs], columns=["Paragraph"])
+    sentences_df = pd.DataFrame([s.text for s in sentences], columns=["Sentence"])
+    # Option to download the paragraphs and sentences as CSV files
+    st.download_button(
+        label="Download Paragraphs as CSV",
+        data=paragraphs_df.to_csv(index=False).encode('utf-8'),
+        file_name="paragraphs.csv",
+        mime="text/csv"
+    )
+    st.download_button(
+        label="Download Sentences as CSV",
+        data=sentences_df.to_csv(index=False).encode('utf-8'),
+        file_name="sentences.csv",
+        mime="text/csv"
+    )
 else:
+    st.write("Please upload a text file or paste your text to split it into paragraphs and sentences.")