DevBM commited on
Commit
61e6e50
·
verified ·
1 Parent(s): 26c6760

Use function for extracting text from any document(doc,docx,text,ppt,pptx,latex,html,pdf) instead of just pdf

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -15,7 +15,7 @@ st.set_page_config(
15
  )
16
 
17
 
18
- from text_processing import clean_text, get_pdf_text
19
  from question_generation import generate_questions_async
20
  from visualization import display_word_cloud
21
  from data_export import export_to_csv, export_to_pdf
@@ -68,13 +68,13 @@ def main():
68
  text = None
69
  if input_type == "Text Input":
70
  text = st.text_area("Enter text here:", value="Joe Biden, the current US president is on a weak wicket going in for his reelection later this November against former President Donald Trump.", help="Enter or paste your text here")
71
- elif input_type == "Upload PDF":
72
- file = st.file_uploader("Upload PDF Files")
73
  if file is not None:
74
  try:
75
- text = get_pdf_text(file)
76
  except Exception as e:
77
- st.error(f"Error reading PDF file: {str(e)}")
78
  text = None
79
  if text:
80
  text = clean_text(text)
 
15
  )
16
 
17
 
18
+ from text_processing import clean_text, get_text_from_document
19
  from question_generation import generate_questions_async
20
  from visualization import display_word_cloud
21
  from data_export import export_to_csv, export_to_pdf
 
68
  text = None
69
  if input_type == "Text Input":
70
  text = st.text_area("Enter text here:", value="Joe Biden, the current US president is on a weak wicket going in for his reelection later this November against former President Donald Trump.", help="Enter or paste your text here")
71
+ elif input_type == "Upload Document":
72
+ file = st.file_uploader("Upload Document File", type=['pdf', 'docx', 'doc', 'pptx', 'ppt', 'html', 'tex', 'txt'])
73
  if file is not None:
74
  try:
75
+ text = get_text_from_document(file)
76
  except Exception as e:
77
+ st.error(f"Error reading file: {str(e)}")
78
  text = None
79
  if text:
80
  text = clean_text(text)