import streamlit as st
import easyocr
import numpy as np
from PIL import Image
import re
import io
import base64
from streamlit_lottie import st_lottie
import requests
# Set page configuration
st.set_page_config(page_title="OCR & Search App", layout="wide")
# Custom CSS
st.markdown("""
""", unsafe_allow_html=True)
# Lottie Animation
def load_lottieurl(url: str):
r = requests.get(url)
if r.status_code != 200:
return None
return r.json()
lottie_url = "https://assets5.lottiefiles.com/packages/lf20_fcfjwiyb.json"
lottie_json = load_lottieurl(lottie_url)
# Initialize the OCR reader
@st.cache_resource
def load_ocr_reader():
return easyocr.Reader(['en', 'hi']) # For English and Hindi
reader = load_ocr_reader()
def process_image(image):
try:
img_array = np.array(image)
if len(img_array.shape) == 2: # Grayscale
img_array = np.stack((img_array,)*3, axis=-1)
elif img_array.shape[2] == 4: # RGBA
img_array = img_array[:,:,:3]
results = reader.readtext(img_array)
extracted_text = '\n'.join([result[1] for result in results])
return extracted_text
except Exception as e:
return f"Error processing image: {str(e)}"
def search_in_text(extracted_text, keyword):
if not keyword:
return "No keyword provided."
try:
lines = extracted_text.split('\n')
highlighted_lines = []
for line in lines:
if keyword.lower() in line.lower():
pattern = re.compile(re.escape(keyword), re.IGNORECASE)
highlighted_line = pattern.sub(lambda m: f"{m.group()}", line)
highlighted_lines.append(highlighted_line)
if highlighted_lines:
return "
".join(highlighted_lines)
else:
return "Keyword not found."
except Exception as e:
return f"Error searching text: {str(e)}"
# Streamlit app
st.title("📷 OCR and Keyword Search Application")
st.write("Upload an image containing Hindi or English text, extract the content, and search for keywords.")
# Create three columns
col1, col2, col3 = st.columns([1, 1, 1])
with col1:
st.header("📤 Upload Image")
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
if st.button('🔍 Extract Text', key='extract'):
with st.spinner('Extracting text...'):
image = Image.open(uploaded_file)
extracted_text = process_image(image)
st.session_state['extracted_text'] = extracted_text
st.success('Text extracted successfully!')
image = Image.open(uploaded_file)
st.image(image, caption='Uploaded Image', use_column_width=True)
else:
st_lottie(lottie_json, key="lottie", height=300)
with col2:
st.header("📝 Extracted Text")
if 'extracted_text' in st.session_state:
st.text_area("", st.session_state['extracted_text'], height=300)
# Download button
st.download_button(
label="📥 Download Extracted Text",
data=st.session_state['extracted_text'].encode('utf-8'),
file_name="extracted_text.txt",
mime="text/plain"
)
else:
st.info("Upload an image and extract text to see the results here.")
with col3:
st.header("🔎 Keyword Search")
if 'extracted_text' in st.session_state:
keyword = st.text_input("Enter keyword to search")
if keyword:
search_result = search_in_text(st.session_state['extracted_text'], keyword)
st.markdown(search_result, unsafe_allow_html=True)
# Word count
word_count = len(st.session_state['extracted_text'].split())
st.metric(label="Word Count", value=word_count)
# Language detection
def detect_language(text):
hindi_pattern = re.compile(r'[\u0900-\u097F]')
if hindi_pattern.search(text):
return "Hindi (and possibly English)"
return "English"
language = detect_language(st.session_state['extracted_text'])
st.info(f"Detected Language: {language}")
else:
st.info("Extract text from an image to use the search functionality.")
# Add a footer
st.markdown("""