Spaces:

PRIYANSHUDHAKED
/

Data_Extraction_OCR

Sleeping

File size: 2,056 Bytes

ec4e14b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce990a6
ec4e14b
 
ce990a6
ec4e14b
 
ce990a6
dbf63c3
 
 
ce990a6
dbf63c3

# app.py  
import streamlit as st  
import cv2  
import numpy as np  
import pytesseract  
from PIL import Image  
import re  

# Set the title of the webpage  
st.title("OCR Text Extraction Tool")  

# Uploading an image  
uploaded_file = st.file_uploader("Upload an Image", type=["jpg", "jpeg", "png"])  

if uploaded_file is not None:  
    # Convert the uploaded file content to an image  
    image = Image.open(uploaded_file)  
    
    # Convert PIL Image to OpenCV format  
    opencv_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)  

    # Display the image  
    st.image(image, caption='Uploaded Image', use_column_width=True)  
    
    try:  
        # Perform OCR  
        text = pytesseract.image_to_string(opencv_image)  
        
        st.subheader("Extracted Text:")  
        st.write(text)  

        # Search functionality  
        search_keyword = st.text_input("Enter a keyword to search in the extracted text:")  
        if search_keyword:  
            pattern = re.compile(re.escape(search_keyword), re.IGNORECASE)  
            matches = list(pattern.finditer(text))  
            
            if matches:  
                st.markdown("### Keyword Found:")  
                for match in matches:  
                    start, end = match.span()  
                    context_start = max(0, start - 50)  
                    context_end = min(len(text), end + 50)  
                    context = text[context_start:context_end]  
                    highlighted_text = (  
                        context[:start-context_start] +  
                        f"<span style='background-color: yellow;'>{context[start-context_start:end-context_start]}</span>" +  
                        context[end-context_start:]  
                    )  
                    st.markdown(f"...{highlighted_text}...")  
            else:  
                st.write(f"Keyword '{search_keyword}' not found in the extracted text.")  
    except Exception as e:  
        st.error(f"An error occurred while processing the image: {str(e)}")