ocr-app / app.py
Arch10's picture
Update app.py
a041eb6 verified
raw
history blame
1.4 kB
import streamlit as st
import easyocr
from PIL import Image
import re
# Initialize EasyOCR Reader
reader = easyocr.Reader(['en', 'hi'])
# Streamlit app title
st.title("Image Text Extraction and Keyword Search using EasyOCR")
# File uploader for image input
uploaded_image = st.file_uploader("Upload an image", type=['png', 'jpg', 'jpeg'])
if uploaded_image is not None:
# Load the uploaded image
image = Image.open(uploaded_image)
# Display the image
st.image(image, caption='Uploaded Image', use_column_width=True)
# Perform OCR
with st.spinner("Extracting text..."):
results = reader.readtext(image)
# Extract the text
extracted_text = " ".join([text for (_, text, _) in results])
if extracted_text:
st.success("Extracted Text:")
st.write(extracted_text)
# Keyword search feature
keyword = st.text_input("Enter a keyword to search in the extracted text:")
if keyword:
# Highlight matches
highlighted_text = re.sub(f"({keyword})", r"<mark>\1</mark>", extracted_text, flags=re.IGNORECASE)
st.markdown(f"**Search Results for '{keyword}':**", unsafe_allow_html=True)
st.markdown(highlighted_text, unsafe_allow_html=True)
else:
st.info("Enter a keyword to search.")
else:
st.warning("No text detected in the image.")