Spaces:
Build error
Build error
Upload app.py
Browse files
app.py
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from PIL import Image
|
3 |
+
import pytesseract
|
4 |
+
import requests
|
5 |
+
|
6 |
+
# Set the path to Tesseract-OCR engine on your system
|
7 |
+
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
|
8 |
+
|
9 |
+
# Function to extract text from the image using Tesseract
|
10 |
+
def extract_text_from_image(image):
|
11 |
+
text = pytesseract.image_to_string(image, lang='eng+hin')
|
12 |
+
return text
|
13 |
+
|
14 |
+
# Function to highlight the keyword in yellow and bold in the extracted text
|
15 |
+
def highlight_text(text, keyword):
|
16 |
+
# Split text into lines to highlight keywords in their respective lines
|
17 |
+
lines = text.splitlines()
|
18 |
+
highlighted_lines = []
|
19 |
+
for line in lines:
|
20 |
+
highlighted_line = line.replace(keyword, f"<span style='background-color: yellow; font-weight: bold;'>{keyword}</span>")
|
21 |
+
highlighted_lines.append(highlighted_line)
|
22 |
+
# Join the lines back into a single string
|
23 |
+
return "<br>".join(highlighted_lines)
|
24 |
+
|
25 |
+
# Function to get the meaning of the searched keyword using the Dictionary API
|
26 |
+
def get_keyword_meaning(keyword):
|
27 |
+
# Handler function to retrieve word meaning
|
28 |
+
def handler(word):
|
29 |
+
url = f"https://api.dictionaryapi.dev/api/v2/entries/en/{word}"
|
30 |
+
r = requests.get(url)
|
31 |
+
return r.json()
|
32 |
+
|
33 |
+
# Call the handler function with the keyword
|
34 |
+
data = handler(keyword)
|
35 |
+
|
36 |
+
print(f"Response Data: {data}") # Debugging output
|
37 |
+
|
38 |
+
if isinstance(data, list) and len(data) > 0:
|
39 |
+
try:
|
40 |
+
meanings = data[0]['meanings'][0]['definitions']
|
41 |
+
meaning = meanings[0]['definition'] if meanings else "Meaning not found."
|
42 |
+
except (IndexError, KeyError):
|
43 |
+
meaning = "Meaning not found."
|
44 |
+
elif isinstance(data, dict) and 'message' in data:
|
45 |
+
meaning = data['message'] # For error messages
|
46 |
+
else:
|
47 |
+
meaning = "Could not retrieve meaning. Check your internet connection."
|
48 |
+
|
49 |
+
return meaning
|
50 |
+
|
51 |
+
# Streamlit application
|
52 |
+
st.title("OCR Web Application")
|
53 |
+
|
54 |
+
# Step 1: Upload image file (JPEG, PNG)
|
55 |
+
uploaded_file = st.file_uploader("Upload an image file (JPEG, PNG)", type=["jpg", "jpeg", "png"])
|
56 |
+
|
57 |
+
if uploaded_file is not None:
|
58 |
+
# Step 2: Open and display the uploaded image
|
59 |
+
image = Image.open(uploaded_file)
|
60 |
+
st.image(image, caption="Uploaded Image", use_column_width=True)
|
61 |
+
|
62 |
+
# Step 3: Extract text from the image
|
63 |
+
extracted_text = extract_text_from_image(image)
|
64 |
+
st.subheader("Extracted Text:")
|
65 |
+
|
66 |
+
# Display the extracted text as-is with line breaks
|
67 |
+
st.text(extracted_text) # Use st.text to preserve the formatting
|
68 |
+
|
69 |
+
# Step 4: Search for keywords in the extracted text
|
70 |
+
search_keyword = st.text_input("Enter keyword to search in the extracted text:")
|
71 |
+
|
72 |
+
if search_keyword:
|
73 |
+
if search_keyword.lower() in extracted_text.lower():
|
74 |
+
# Highlight matching keywords in the extracted text
|
75 |
+
highlighted_text = highlight_text(extracted_text, search_keyword)
|
76 |
+
st.subheader("Search Results:")
|
77 |
+
|
78 |
+
# Display the highlighted text as HTML to apply the background color and bold text
|
79 |
+
st.markdown(highlighted_text, unsafe_allow_html=True)
|
80 |
+
|
81 |
+
# Step 5: Get and display the meaning of the searched keyword
|
82 |
+
meaning = get_keyword_meaning(search_keyword.lower())
|
83 |
+
st.subheader(f"Meaning of '{search_keyword}':")
|
84 |
+
st.write(meaning)
|
85 |
+
else:
|
86 |
+
# No matches found message
|
87 |
+
st.subheader("Search Results:")
|
88 |
+
st.write("No matches found.")
|