File size: 2,104 Bytes
7d3db52
 
245632e
7d3db52
 
 
 
 
 
 
 
 
245632e
 
 
 
7d3db52
245632e
7d3db52
245632e
 
 
 
 
 
 
 
 
7d3db52
245632e
 
7d3db52
 
 
 
 
 
 
245632e
 
 
 
 
 
 
7d3db52
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import streamlit as st
import requests
from PIL import Image

st.title("OCR Extraction Client")
st.write(
    """
    This app lets you upload a PDF or image file. The file is sent to a FastAPI endpoint for OCR extraction,
    and then the extracted text is returned as a Markdown file.
    """
)

# Sidebar for uploading the document and processing
st.sidebar.header("Upload Document")
uploaded_file = st.sidebar.file_uploader("Upload a PDF or image file", type=["pdf", "png", "jpg", "jpeg", "webp"])
process_button = st.sidebar.button("Process Document")

if uploaded_file is not None and process_button:
    st.info(f"Processing file: **{uploaded_file.name}**")
    
    # If the file is an image, display it
    if uploaded_file.name.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
        try:
            image = Image.open(uploaded_file)
            st.image(image, caption="Uploaded Image", use_column_width=True)
        except Exception as e:
            st.error(f"Error displaying image: {e}")
    
    with st.spinner("Sending file to OCR service..."):
        # Prepare the file payload using getvalue() to obtain file bytes.
        files = {"file": (uploaded_file.name, uploaded_file.getvalue(), uploaded_file.type)}
        api_url = "https://hammad712-urdu-ocr-app.hf.space/upload"
        response = requests.post(api_url, files=files)
    
    if response.status_code == 200:
        st.success("OCR extraction complete!")
        md_content = response.content.decode("utf-8")
        
        # Display output based on file type.
        if uploaded_file.name.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')):
            st.markdown("### Extracted Text from Image")
            st.markdown(md_content)
        else:
            st.markdown("### Extracted Markdown Text")
            st.markdown(md_content)
        
        st.download_button(
            label="Download Markdown File",
            data=md_content,
            file_name="output.md",
            mime="text/markdown"
        )
    else:
        st.error(f"Error: {response.status_code} {response.text}")