Spaces:
Build error
Build error
import streamlit as st | |
from PIL import Image | |
import pytesseract | |
import requests | |
# Set the path to Tesseract-OCR engine on your system | |
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' | |
# Function to extract text from the image using Tesseract | |
def extract_text_from_image(image): | |
text = pytesseract.image_to_string(image, lang='eng+hin') | |
return text | |
# Function to highlight the keyword in yellow and bold in the extracted text | |
def highlight_text(text, keyword): | |
# Split text into lines to highlight keywords in their respective lines | |
lines = text.splitlines() | |
highlighted_lines = [] | |
for line in lines: | |
highlighted_line = line.replace(keyword, f"<span style='background-color: yellow; font-weight: bold;'>{keyword}</span>") | |
highlighted_lines.append(highlighted_line) | |
# Join the lines back into a single string | |
return "<br>".join(highlighted_lines) | |
# Function to get the meaning of the searched keyword using the Dictionary API | |
def get_keyword_meaning(keyword): | |
# Handler function to retrieve word meaning | |
def handler(word): | |
url = f"https://api.dictionaryapi.dev/api/v2/entries/en/{word}" | |
r = requests.get(url) | |
return r.json() | |
# Call the handler function with the keyword | |
data = handler(keyword) | |
print(f"Response Data: {data}") # Debugging output | |
if isinstance(data, list) and len(data) > 0: | |
try: | |
meanings = data[0]['meanings'][0]['definitions'] | |
meaning = meanings[0]['definition'] if meanings else "Meaning not found." | |
except (IndexError, KeyError): | |
meaning = "Meaning not found." | |
elif isinstance(data, dict) and 'message' in data: | |
meaning = data['message'] # For error messages | |
else: | |
meaning = "Could not retrieve meaning. Check your internet connection." | |
return meaning | |
# Streamlit application | |
st.title("OCR Web Application") | |
# Step 1: Upload image file (JPEG, PNG) | |
uploaded_file = st.file_uploader("Upload an image file (JPEG, PNG)", type=["jpg", "jpeg", "png"]) | |
if uploaded_file is not None: | |
# Step 2: Open and display the uploaded image | |
image = Image.open(uploaded_file) | |
st.image(image, caption="Uploaded Image", use_column_width=True) | |
# Step 3: Extract text from the image | |
extracted_text = extract_text_from_image(image) | |
st.subheader("Extracted Text:") | |
# Display the extracted text as-is with line breaks | |
st.text(extracted_text) # Use st.text to preserve the formatting | |
# Step 4: Search for keywords in the extracted text | |
search_keyword = st.text_input("Enter keyword to search in the extracted text:") | |
if search_keyword: | |
if search_keyword.lower() in extracted_text.lower(): | |
# Highlight matching keywords in the extracted text | |
highlighted_text = highlight_text(extracted_text, search_keyword) | |
st.subheader("Search Results:") | |
# Display the highlighted text as HTML to apply the background color and bold text | |
st.markdown(highlighted_text, unsafe_allow_html=True) | |
# Step 5: Get and display the meaning of the searched keyword | |
meaning = get_keyword_meaning(search_keyword.lower()) | |
st.subheader(f"Meaning of '{search_keyword}':") | |
st.write(meaning) | |
else: | |
# No matches found message | |
st.subheader("Search Results:") | |
st.write("No matches found.") | |