Spaces:
Sleeping
Sleeping
# app.py | |
import streamlit as st | |
import cv2 | |
import numpy as np | |
import pytesseract | |
from PIL import Image | |
import re | |
# Set the title of the webpage | |
st.title("OCR Text Extraction Tool") | |
# Uploading an image | |
uploaded_file = st.file_uploader("Upload an Image", type=["jpg", "jpeg", "png"]) | |
if uploaded_file is not None: | |
# Convert the uploaded file content to an image | |
image = Image.open(uploaded_file) | |
# Convert PIL Image to OpenCV format | |
opencv_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) | |
# Display the image | |
st.image(image, caption='Uploaded Image', use_column_width=True) | |
try: | |
# Perform OCR | |
text = pytesseract.image_to_string(opencv_image) | |
st.subheader("Extracted Text:") | |
st.write(text) | |
# Search functionality | |
search_keyword = st.text_input("Enter a keyword to search in the extracted text:") | |
if search_keyword: | |
pattern = re.compile(re.escape(search_keyword), re.IGNORECASE) | |
matches = list(pattern.finditer(text)) | |
if matches: | |
st.markdown("### Keyword Found:") | |
for match in matches: | |
start, end = match.span() | |
context_start = max(0, start - 50) | |
context_end = min(len(text), end + 50) | |
context = text[context_start:context_end] | |
highlighted_text = ( | |
context[:start-context_start] + | |
f"<span style='background-color: yellow;'>{context[start-context_start:end-context_start]}</span>" + | |
context[end-context_start:] | |
) | |
st.markdown(f"...{highlighted_text}...") | |
else: | |
st.write(f"Keyword '{search_keyword}' not found in the extracted text.") | |
except Exception as e: | |
st.error(f"An error occurred while processing the image: {str(e)}") |