Spaces:

snehareddy23
/

OCR-Web-Application

Build error

App Files Files Community

snehareddy23 commited on Sep 26, 2024

Commit

0d66a88

verified ·

1 Parent(s): 41493e2

Upload app.py

Browse files

Files changed (1) hide show

app.py +88 -0

app.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import streamlit as st
+from PIL import Image
+import pytesseract
+import requests
+# Set the path to Tesseract-OCR engine on your system
+pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
+# Function to extract text from the image using Tesseract
+def extract_text_from_image(image):
+    text = pytesseract.image_to_string(image, lang='eng+hin')
+    return text
+# Function to highlight the keyword in yellow and bold in the extracted text
+def highlight_text(text, keyword):
+    # Split text into lines to highlight keywords in their respective lines
+    lines = text.splitlines()
+    highlighted_lines = []
+    for line in lines:
+        highlighted_line = line.replace(keyword, f"<span style='background-color: yellow; font-weight: bold;'>{keyword}</span>")
+        highlighted_lines.append(highlighted_line)
+    # Join the lines back into a single string
+    return "<br>".join(highlighted_lines)
+# Function to get the meaning of the searched keyword using the Dictionary API
+def get_keyword_meaning(keyword):
+    # Handler function to retrieve word meaning
+    def handler(word):
+        url = f"https://api.dictionaryapi.dev/api/v2/entries/en/{word}"
+        r = requests.get(url)
+        return r.json()
+    # Call the handler function with the keyword
+    data = handler(keyword)
+    print(f"Response Data: {data}")  # Debugging output
+    if isinstance(data, list) and len(data) > 0:
+        try:
+            meanings = data[0]['meanings'][0]['definitions']
+            meaning = meanings[0]['definition'] if meanings else "Meaning not found."
+        except (IndexError, KeyError):
+            meaning = "Meaning not found."
+    elif isinstance(data, dict) and 'message' in data:
+        meaning = data['message']  # For error messages
+    else:
+        meaning = "Could not retrieve meaning. Check your internet connection."
+    return meaning
+# Streamlit application
+st.title("OCR Web Application")
+# Step 1: Upload image file (JPEG, PNG)
+uploaded_file = st.file_uploader("Upload an image file (JPEG, PNG)", type=["jpg", "jpeg", "png"])
+if uploaded_file is not None:
+    # Step 2: Open and display the uploaded image
+    image = Image.open(uploaded_file)
+    st.image(image, caption="Uploaded Image", use_column_width=True)
+    # Step 3: Extract text from the image
+    extracted_text = extract_text_from_image(image)
+    st.subheader("Extracted Text:")
+    # Display the extracted text as-is with line breaks
+    st.text(extracted_text)  # Use st.text to preserve the formatting
+    # Step 4: Search for keywords in the extracted text
+    search_keyword = st.text_input("Enter keyword to search in the extracted text:")
+    if search_keyword:
+        if search_keyword.lower() in extracted_text.lower():
+            # Highlight matching keywords in the extracted text
+            highlighted_text = highlight_text(extracted_text, search_keyword)
+            st.subheader("Search Results:")
+            # Display the highlighted text as HTML to apply the background color and bold text
+            st.markdown(highlighted_text, unsafe_allow_html=True)
+            # Step 5: Get and display the meaning of the searched keyword
+            meaning = get_keyword_meaning(search_keyword.lower())
+            st.subheader(f"Meaning of '{search_keyword}':")
+            st.write(meaning)
+        else:
+            # No matches found message
+            st.subheader("Search Results:")
+            st.write("No matches found.")