import streamlit as st from PIL import Image import pytesseract import requests # Set the path to Tesseract-OCR engine on your system pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' # Function to extract text from the image using Tesseract def extract_text_from_image(image): text = pytesseract.image_to_string(image, lang='eng+hin') return text # Function to highlight the keyword in yellow and bold in the extracted text def highlight_text(text, keyword): # Split text into lines to highlight keywords in their respective lines lines = text.splitlines() highlighted_lines = [] for line in lines: highlighted_line = line.replace(keyword, f"{keyword}") highlighted_lines.append(highlighted_line) # Join the lines back into a single string return "
".join(highlighted_lines) # Function to get the meaning of the searched keyword using the Dictionary API def get_keyword_meaning(keyword): # Handler function to retrieve word meaning def handler(word): url = f"https://api.dictionaryapi.dev/api/v2/entries/en/{word}" r = requests.get(url) return r.json() # Call the handler function with the keyword data = handler(keyword) print(f"Response Data: {data}") # Debugging output if isinstance(data, list) and len(data) > 0: try: meanings = data[0]['meanings'][0]['definitions'] meaning = meanings[0]['definition'] if meanings else "Meaning not found." except (IndexError, KeyError): meaning = "Meaning not found." elif isinstance(data, dict) and 'message' in data: meaning = data['message'] # For error messages else: meaning = "Could not retrieve meaning. Check your internet connection." return meaning # Streamlit application st.title("OCR Web Application") # Step 1: Upload image file (JPEG, PNG) uploaded_file = st.file_uploader("Upload an image file (JPEG, PNG)", type=["jpg", "jpeg", "png"]) if uploaded_file is not None: # Step 2: Open and display the uploaded image image = Image.open(uploaded_file) st.image(image, caption="Uploaded Image", use_column_width=True) # Step 3: Extract text from the image extracted_text = extract_text_from_image(image) st.subheader("Extracted Text:") # Display the extracted text as-is with line breaks st.text(extracted_text) # Use st.text to preserve the formatting # Step 4: Search for keywords in the extracted text search_keyword = st.text_input("Enter keyword to search in the extracted text:") if search_keyword: if search_keyword.lower() in extracted_text.lower(): # Highlight matching keywords in the extracted text highlighted_text = highlight_text(extracted_text, search_keyword) st.subheader("Search Results:") # Display the highlighted text as HTML to apply the background color and bold text st.markdown(highlighted_text, unsafe_allow_html=True) # Step 5: Get and display the meaning of the searched keyword meaning = get_keyword_meaning(search_keyword.lower()) st.subheader(f"Meaning of '{search_keyword}':") st.write(meaning) else: # No matches found message st.subheader("Search Results:") st.write("No matches found.")