PRIYANSHUDHAKED's picture
Update app.py
dbf63c3 verified
raw
history blame
2.06 kB
# app.py
import streamlit as st
import cv2
import numpy as np
import pytesseract
from PIL import Image
import re
# Set the title of the webpage
st.title("OCR Text Extraction Tool")
# Uploading an image
uploaded_file = st.file_uploader("Upload an Image", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
# Convert the uploaded file content to an image
image = Image.open(uploaded_file)
# Convert PIL Image to OpenCV format
opencv_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
# Display the image
st.image(image, caption='Uploaded Image', use_column_width=True)
try:
# Perform OCR
text = pytesseract.image_to_string(opencv_image)
st.subheader("Extracted Text:")
st.write(text)
# Search functionality
search_keyword = st.text_input("Enter a keyword to search in the extracted text:")
if search_keyword:
pattern = re.compile(re.escape(search_keyword), re.IGNORECASE)
matches = list(pattern.finditer(text))
if matches:
st.markdown("### Keyword Found:")
for match in matches:
start, end = match.span()
context_start = max(0, start - 50)
context_end = min(len(text), end + 50)
context = text[context_start:context_end]
highlighted_text = (
context[:start-context_start] +
f"<span style='background-color: yellow;'>{context[start-context_start:end-context_start]}</span>" +
context[end-context_start:]
)
st.markdown(f"...{highlighted_text}...")
else:
st.write(f"Keyword '{search_keyword}' not found in the extracted text.")
except Exception as e:
st.error(f"An error occurred while processing the image: {str(e)}")