Spaces:
Sleeping
Sleeping
Use function for extracting text from any document(doc,docx,text,ppt,pptx,latex,html,pdf) instead of just pdf
Browse files
app.py
CHANGED
@@ -15,7 +15,7 @@ st.set_page_config(
|
|
15 |
)
|
16 |
|
17 |
|
18 |
-
from text_processing import clean_text,
|
19 |
from question_generation import generate_questions_async
|
20 |
from visualization import display_word_cloud
|
21 |
from data_export import export_to_csv, export_to_pdf
|
@@ -68,13 +68,13 @@ def main():
|
|
68 |
text = None
|
69 |
if input_type == "Text Input":
|
70 |
text = st.text_area("Enter text here:", value="Joe Biden, the current US president is on a weak wicket going in for his reelection later this November against former President Donald Trump.", help="Enter or paste your text here")
|
71 |
-
elif input_type == "Upload
|
72 |
-
file = st.file_uploader("Upload
|
73 |
if file is not None:
|
74 |
try:
|
75 |
-
text =
|
76 |
except Exception as e:
|
77 |
-
st.error(f"Error reading
|
78 |
text = None
|
79 |
if text:
|
80 |
text = clean_text(text)
|
|
|
15 |
)
|
16 |
|
17 |
|
18 |
+
from text_processing import clean_text, get_text_from_document
|
19 |
from question_generation import generate_questions_async
|
20 |
from visualization import display_word_cloud
|
21 |
from data_export import export_to_csv, export_to_pdf
|
|
|
68 |
text = None
|
69 |
if input_type == "Text Input":
|
70 |
text = st.text_area("Enter text here:", value="Joe Biden, the current US president is on a weak wicket going in for his reelection later this November against former President Donald Trump.", help="Enter or paste your text here")
|
71 |
+
elif input_type == "Upload Document":
|
72 |
+
file = st.file_uploader("Upload Document File", type=['pdf', 'docx', 'doc', 'pptx', 'ppt', 'html', 'tex', 'txt'])
|
73 |
if file is not None:
|
74 |
try:
|
75 |
+
text = get_text_from_document(file)
|
76 |
except Exception as e:
|
77 |
+
st.error(f"Error reading file: {str(e)}")
|
78 |
text = None
|
79 |
if text:
|
80 |
text = clean_text(text)
|