import streamlit as st import easyocr import numpy as np from PIL import Image import re import io import base64 from streamlit_lottie import st_lottie import requests # Set page configuration st.set_page_config(page_title="OCR & Search App", layout="wide") # Custom CSS st.markdown(""" """, unsafe_allow_html=True) # Lottie Animation def load_lottieurl(url: str): r = requests.get(url) if r.status_code != 200: return None return r.json() lottie_url = "https://assets5.lottiefiles.com/packages/lf20_fcfjwiyb.json" lottie_json = load_lottieurl(lottie_url) # Initialize the OCR reader @st.cache_resource def load_ocr_reader(): return easyocr.Reader(['en', 'hi']) # For English and Hindi reader = load_ocr_reader() def process_image(image): try: img_array = np.array(image) if len(img_array.shape) == 2: # Grayscale img_array = np.stack((img_array,)*3, axis=-1) elif img_array.shape[2] == 4: # RGBA img_array = img_array[:,:,:3] results = reader.readtext(img_array) extracted_text = '\n'.join([result[1] for result in results]) return extracted_text except Exception as e: return f"Error processing image: {str(e)}" def search_in_text(extracted_text, keyword): if not keyword: return "No keyword provided." try: lines = extracted_text.split('\n') highlighted_lines = [] for line in lines: if keyword.lower() in line.lower(): pattern = re.compile(re.escape(keyword), re.IGNORECASE) highlighted_line = pattern.sub(lambda m: f"{m.group()}", line) highlighted_lines.append(highlighted_line) if highlighted_lines: return "
".join(highlighted_lines) else: return "Keyword not found." except Exception as e: return f"Error searching text: {str(e)}" # Streamlit app st.title("📷 OCR and Keyword Search Application") st.write("Upload an image containing Hindi or English text, extract the content, and search for keywords.") # Create three columns col1, col2, col3 = st.columns([1, 1, 1]) with col1: st.header("📤 Upload Image") uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) if uploaded_file is not None: if st.button('🔍 Extract Text', key='extract'): with st.spinner('Extracting text...'): image = Image.open(uploaded_file) extracted_text = process_image(image) st.session_state['extracted_text'] = extracted_text st.success('Text extracted successfully!') image = Image.open(uploaded_file) st.image(image, caption='Uploaded Image', use_column_width=True) else: st_lottie(lottie_json, key="lottie", height=300) with col2: st.header("📝 Extracted Text") if 'extracted_text' in st.session_state: st.text_area("", st.session_state['extracted_text'], height=300) # Download button st.download_button( label="📥 Download Extracted Text", data=st.session_state['extracted_text'].encode('utf-8'), file_name="extracted_text.txt", mime="text/plain" ) else: st.info("Upload an image and extract text to see the results here.") with col3: st.header("🔎 Keyword Search") if 'extracted_text' in st.session_state: keyword = st.text_input("Enter keyword to search") if keyword: search_result = search_in_text(st.session_state['extracted_text'], keyword) st.markdown(search_result, unsafe_allow_html=True) # Word count word_count = len(st.session_state['extracted_text'].split()) st.metric(label="Word Count", value=word_count) # Language detection def detect_language(text): hindi_pattern = re.compile(r'[\u0900-\u097F]') if hindi_pattern.search(text): return "Hindi (and possibly English)" return "English" language = detect_language(st.session_state['extracted_text']) st.info(f"Detected Language: {language}") else: st.info("Extract text from an image to use the search functionality.") # Add a footer st.markdown(""" """, unsafe_allow_html=True)