SitwalaM commited on
Commit
7f0f75f
·
1 Parent(s): 496220c

added file upload and outputs download

Browse files
Files changed (1) hide show
  1. app.py +30 -3
app.py CHANGED
@@ -1,11 +1,19 @@
1
  import streamlit as st
 
2
  from dolma.core.utils import split_paragraphs, split_sentences
3
 
4
  # Title of the Streamlit app
5
  st.title('Text Splitter: Paragraphs and Sentences')
6
 
7
- # Text input from user
8
- sample_text = st.text_area("Paste your text below", height=300)
 
 
 
 
 
 
 
9
 
10
  if sample_text:
11
  # Split the text into paragraphs
@@ -35,5 +43,24 @@ if sample_text:
35
  st.subheader(f"Sentence {i + 1}")
36
  st.write(sentence.text)
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  else:
39
- st.write("Please paste your text to split it into paragraphs and sentences.")
 
1
  import streamlit as st
2
+ import pandas as pd
3
  from dolma.core.utils import split_paragraphs, split_sentences
4
 
5
  # Title of the Streamlit app
6
  st.title('Text Splitter: Paragraphs and Sentences')
7
 
8
+ # File uploader for text document
9
+ uploaded_file = st.file_uploader("Upload a text file", type=["txt"])
10
+
11
+ if uploaded_file:
12
+ # Read the uploaded text file
13
+ sample_text = uploaded_file.read().decode("utf-8")
14
+ else:
15
+ # Text input from user
16
+ sample_text = st.text_area("Or paste your text below", height=300)
17
 
18
  if sample_text:
19
  # Split the text into paragraphs
 
43
  st.subheader(f"Sentence {i + 1}")
44
  st.write(sentence.text)
45
 
46
+ # Convert paragraphs and sentences to pandas DataFrames
47
+ paragraphs_df = pd.DataFrame([p.text for p in paragraphs], columns=["Paragraph"])
48
+ sentences_df = pd.DataFrame([s.text for s in sentences], columns=["Sentence"])
49
+
50
+ # Option to download the paragraphs and sentences as CSV files
51
+ st.download_button(
52
+ label="Download Paragraphs as CSV",
53
+ data=paragraphs_df.to_csv(index=False).encode('utf-8'),
54
+ file_name="paragraphs.csv",
55
+ mime="text/csv"
56
+ )
57
+
58
+ st.download_button(
59
+ label="Download Sentences as CSV",
60
+ data=sentences_df.to_csv(index=False).encode('utf-8'),
61
+ file_name="sentences.csv",
62
+ mime="text/csv"
63
+ )
64
+
65
  else:
66
+ st.write("Please upload a text file or paste your text to split it into paragraphs and sentences.")