mgokg commited on
Commit
8ccf021
·
verified ·
1 Parent(s): 35d364a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -1
app.py CHANGED
@@ -9,6 +9,25 @@ def process_pdf(file):
9
  for page in pdf_reader.pages:
10
  text += page.extract_text()
11
  return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  with gr.Blocks() as demo:
14
  gr.Markdown("### File upload", elem_classes="tab-header")
@@ -18,6 +37,6 @@ with gr.Blocks() as demo:
18
  file_input = gr.File(label="Wähle eine PDF-Datei aus", type="filepath")
19
  with gr.Row():
20
  submit_button = gr.Button("upload")
21
- submit_button.click(process_pdf, inputs=file_input, outputs=text_output)
22
 
23
  demo.launch()
 
9
  for page in pdf_reader.pages:
10
  text += page.extract_text()
11
  return text
12
+
13
+ def process_file(file):
14
+ file_extension = file.name.split(".")[-1].lower()
15
+
16
+ if file_extension == 'pdf':
17
+ ocr_text = process_pdf(file)
18
+ return ocr_text
19
+
20
+ elif file_extension == 'docx':
21
+ docx_document = DocxDocument(file.name)
22
+ text = ""
23
+ for paragraph in docx_document.paragraphs:
24
+ text += paragraph.text + "\n"
25
+
26
+ return text
27
+ #return [Document(text=text)]
28
+
29
+
30
+
31
 
32
  with gr.Blocks() as demo:
33
  gr.Markdown("### File upload", elem_classes="tab-header")
 
37
  file_input = gr.File(label="Wähle eine PDF-Datei aus", type="filepath")
38
  with gr.Row():
39
  submit_button = gr.Button("upload")
40
+ submit_button.click(process_file, inputs=file_input, outputs=text_output)
41
 
42
  demo.launch()