snehareddy23 commited on
Commit
0d66a88
·
verified ·
1 Parent(s): 41493e2

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -0
app.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ import pytesseract
4
+ import requests
5
+
6
+ # Set the path to Tesseract-OCR engine on your system
7
+ pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
8
+
9
+ # Function to extract text from the image using Tesseract
10
+ def extract_text_from_image(image):
11
+ text = pytesseract.image_to_string(image, lang='eng+hin')
12
+ return text
13
+
14
+ # Function to highlight the keyword in yellow and bold in the extracted text
15
+ def highlight_text(text, keyword):
16
+ # Split text into lines to highlight keywords in their respective lines
17
+ lines = text.splitlines()
18
+ highlighted_lines = []
19
+ for line in lines:
20
+ highlighted_line = line.replace(keyword, f"<span style='background-color: yellow; font-weight: bold;'>{keyword}</span>")
21
+ highlighted_lines.append(highlighted_line)
22
+ # Join the lines back into a single string
23
+ return "<br>".join(highlighted_lines)
24
+
25
+ # Function to get the meaning of the searched keyword using the Dictionary API
26
+ def get_keyword_meaning(keyword):
27
+ # Handler function to retrieve word meaning
28
+ def handler(word):
29
+ url = f"https://api.dictionaryapi.dev/api/v2/entries/en/{word}"
30
+ r = requests.get(url)
31
+ return r.json()
32
+
33
+ # Call the handler function with the keyword
34
+ data = handler(keyword)
35
+
36
+ print(f"Response Data: {data}") # Debugging output
37
+
38
+ if isinstance(data, list) and len(data) > 0:
39
+ try:
40
+ meanings = data[0]['meanings'][0]['definitions']
41
+ meaning = meanings[0]['definition'] if meanings else "Meaning not found."
42
+ except (IndexError, KeyError):
43
+ meaning = "Meaning not found."
44
+ elif isinstance(data, dict) and 'message' in data:
45
+ meaning = data['message'] # For error messages
46
+ else:
47
+ meaning = "Could not retrieve meaning. Check your internet connection."
48
+
49
+ return meaning
50
+
51
+ # Streamlit application
52
+ st.title("OCR Web Application")
53
+
54
+ # Step 1: Upload image file (JPEG, PNG)
55
+ uploaded_file = st.file_uploader("Upload an image file (JPEG, PNG)", type=["jpg", "jpeg", "png"])
56
+
57
+ if uploaded_file is not None:
58
+ # Step 2: Open and display the uploaded image
59
+ image = Image.open(uploaded_file)
60
+ st.image(image, caption="Uploaded Image", use_column_width=True)
61
+
62
+ # Step 3: Extract text from the image
63
+ extracted_text = extract_text_from_image(image)
64
+ st.subheader("Extracted Text:")
65
+
66
+ # Display the extracted text as-is with line breaks
67
+ st.text(extracted_text) # Use st.text to preserve the formatting
68
+
69
+ # Step 4: Search for keywords in the extracted text
70
+ search_keyword = st.text_input("Enter keyword to search in the extracted text:")
71
+
72
+ if search_keyword:
73
+ if search_keyword.lower() in extracted_text.lower():
74
+ # Highlight matching keywords in the extracted text
75
+ highlighted_text = highlight_text(extracted_text, search_keyword)
76
+ st.subheader("Search Results:")
77
+
78
+ # Display the highlighted text as HTML to apply the background color and bold text
79
+ st.markdown(highlighted_text, unsafe_allow_html=True)
80
+
81
+ # Step 5: Get and display the meaning of the searched keyword
82
+ meaning = get_keyword_meaning(search_keyword.lower())
83
+ st.subheader(f"Meaning of '{search_keyword}':")
84
+ st.write(meaning)
85
+ else:
86
+ # No matches found message
87
+ st.subheader("Search Results:")
88
+ st.write("No matches found.")