Spaces:

snehareddy23
/

OCR-Web-Application

Build error

App Files Files Community

OCR-Web-Application / app.py

snehareddy23

Upload app.py

0d66a88 verified 10 months ago

raw

history blame contribute delete

3.6 kB

	import streamlit as st
	from PIL import Image
	import pytesseract
	import requests

	# Set the path to Tesseract-OCR engine on your system
	pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

	# Function to extract text from the image using Tesseract
	def extract_text_from_image(image):
	text = pytesseract.image_to_string(image, lang='eng+hin')
	return text

	# Function to highlight the keyword in yellow and bold in the extracted text
	def highlight_text(text, keyword):
	# Split text into lines to highlight keywords in their respective lines
	lines = text.splitlines()
	highlighted_lines = []
	for line in lines:
	highlighted_line = line.replace(keyword, f"<span style='background-color: yellow; font-weight: bold;'>{keyword}</span>")
	highlighted_lines.append(highlighted_line)
	# Join the lines back into a single string
	return "<br>".join(highlighted_lines)

	# Function to get the meaning of the searched keyword using the Dictionary API
	def get_keyword_meaning(keyword):
	# Handler function to retrieve word meaning
	def handler(word):
	url = f"https://api.dictionaryapi.dev/api/v2/entries/en/{word}"
	r = requests.get(url)
	return r.json()

	# Call the handler function with the keyword
	data = handler(keyword)

	print(f"Response Data: {data}") # Debugging output

	if isinstance(data, list) and len(data) > 0:
	try:
	meanings = data[0]['meanings'][0]['definitions']
	meaning = meanings[0]['definition'] if meanings else "Meaning not found."
	except (IndexError, KeyError):
	meaning = "Meaning not found."
	elif isinstance(data, dict) and 'message' in data:
	meaning = data['message'] # For error messages
	else:
	meaning = "Could not retrieve meaning. Check your internet connection."

	return meaning

	# Streamlit application
	st.title("OCR Web Application")

	# Step 1: Upload image file (JPEG, PNG)
	uploaded_file = st.file_uploader("Upload an image file (JPEG, PNG)", type=["jpg", "jpeg", "png"])

	if uploaded_file is not None:
	# Step 2: Open and display the uploaded image
	image = Image.open(uploaded_file)
	st.image(image, caption="Uploaded Image", use_column_width=True)

	# Step 3: Extract text from the image
	extracted_text = extract_text_from_image(image)
	st.subheader("Extracted Text:")

	# Display the extracted text as-is with line breaks
	st.text(extracted_text) # Use st.text to preserve the formatting

	# Step 4: Search for keywords in the extracted text
	search_keyword = st.text_input("Enter keyword to search in the extracted text:")

	if search_keyword:
	if search_keyword.lower() in extracted_text.lower():
	# Highlight matching keywords in the extracted text
	highlighted_text = highlight_text(extracted_text, search_keyword)
	st.subheader("Search Results:")

	# Display the highlighted text as HTML to apply the background color and bold text
	st.markdown(highlighted_text, unsafe_allow_html=True)

	# Step 5: Get and display the meaning of the searched keyword
	meaning = get_keyword_meaning(search_keyword.lower())
	st.subheader(f"Meaning of '{search_keyword}':")
	st.write(meaning)
	else:
	# No matches found message
	st.subheader("Search Results:")
	st.write("No matches found.")