File size: 1,268 Bytes
7d46793
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2df4c2a
7d46793
 
 
 
 
2df4c2a
 
 
 
 
7d46793
 
 
 
 
 
 
 
2df4c2a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import streamlit as st
import os
import shutil
from helper import process_docs

st.title("Benchmark Parser Performance")

st.markdown("### Upload Document:")

uploaded_file = st.file_uploader("Choose a file", type='.pdf')

if uploaded_file:
    if not os.path.exists("./Tested_Docs"):
        os.makedirs("./Tested_Docs")
        
    doc_path = f"./Tested_Docs/{uploaded_file.name}"
    with open(doc_path, "wb") as f:
        f.write(uploaded_file.read())

    try:
        with st.spinner("Processing document..."):
            docsllama, docspaddle, docsdocling = process_docs(doc_path)
            if os.path.exists("./Tested_Docs"):
                shutil.rmtree('./Tested_Docs')
    except Exception as e:
        st.warning(e)
    
    st.markdown("### Extracted Text by Llama-Parser :")
    for page_number, txt in enumerate(docsllama):
        st.markdown(f"#### Page {page_number+1}")
        st.text(txt)

    st.markdown("### Extracted Text by Docling-OCR :")
    for page_number, txt in docsdocling.items():
        st.markdown(f"#### Page {page_number}")
        st.text(txt)

    st.markdown("### Extracted Text by Paddle-OCR :")
    for page_number, txt in enumerate(docspaddle):
        st.markdown(f"#### Page {page_number+1}")
        st.text(txt)