Gopal2002 commited on
Commit
7d46793
·
verified ·
1 Parent(s): 3313e1b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -0
app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import shutil
4
+ from helper import process_docs
5
+
6
+ st.title("Benchmark Parser Performance")
7
+
8
+ st.markdown("### Upload Document:")
9
+
10
+ uploaded_file = st.file_uploader("Choose a file", type='.pdf')
11
+
12
+ if uploaded_file:
13
+ if not os.path.exists("./Tested_Docs"):
14
+ os.makedirs("./Tested_Docs")
15
+
16
+ doc_path = f"./Tested_Docs/{uploaded_file.name}"
17
+ with open(doc_path, "wb") as f:
18
+ f.write(uploaded_file.read())
19
+
20
+ try:
21
+ with st.spinner("Processing document..."):
22
+ docspaddle, docsdocling = process_docs(doc_path)
23
+ if os.path.exists("./Tested_Docs"):
24
+ shutil.rmtree('./Tested_Docs')
25
+ except Exception as e:
26
+ st.warning(e)
27
+
28
+ st.markdown("### Extracted Text by Docling-OCR :")
29
+ for page_number, txt in docsdocling.items():
30
+ st.markdown(f"#### Page {page_number}")
31
+ st.text(txt)
32
+
33
+ st.markdown("### Extracted Text by Paddle-OCR :")
34
+ for page_number, txt in enumerate(docspaddle):
35
+ st.markdown(f"#### Page {page_number+1}")
36
+ st.text(txt)