Spaces:

Arch10
/

ocr-app

Sleeping

File size: 1,404 Bytes

28fd050
a041eb6
28fd050
a041eb6
28fd050
a041eb6
 
28fd050
a041eb6
 
28fd050
a041eb6
 
28fd050
a041eb6
 
 
28fd050
a041eb6
 
28fd050
 
 
a041eb6
 
 
 
 
 
 
 
 
 
 
28fd050
a041eb6

import streamlit as st
import easyocr
from PIL import Image
import re

# Initialize EasyOCR Reader
reader = easyocr.Reader(['en', 'hi'])

# Streamlit app title
st.title("Image Text Extraction and Keyword Search using EasyOCR")

# File uploader for image input
uploaded_image = st.file_uploader("Upload an image", type=['png', 'jpg', 'jpeg'])

if uploaded_image is not None:
    # Load the uploaded image
    image = Image.open(uploaded_image)
    
    # Display the image
    st.image(image, caption='Uploaded Image', use_column_width=True)
    
    # Perform OCR
    with st.spinner("Extracting text..."):
        results = reader.readtext(image)
    
    # Extract the text
    extracted_text = " ".join([text for (_, text, _) in results])
    
    if extracted_text:
        st.success("Extracted Text:")
        st.write(extracted_text)

        # Keyword search feature
        keyword = st.text_input("Enter a keyword to search in the extracted text:")

        if keyword:
            # Highlight matches
            highlighted_text = re.sub(f"({keyword})", r"<mark>\1</mark>", extracted_text, flags=re.IGNORECASE)
            st.markdown(f"**Search Results for '{keyword}':**", unsafe_allow_html=True)
            st.markdown(highlighted_text, unsafe_allow_html=True)
        else:
            st.info("Enter a keyword to search.")
    else:
        st.warning("No text detected in the image.")