PRIYANSHUDHAKED commited on
Commit
3af70ed
·
verified ·
1 Parent(s): 858b316

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -26
app.py CHANGED
@@ -6,38 +6,74 @@ from PIL import Image
6
  import io
7
  import re
8
 
9
- # Function for OCR processing (similar to your existing code)
 
 
 
 
10
  def process_image(image_bytes):
11
- # Convert bytes to image and process
12
- image = Image.open(io.BytesIO(image_bytes))
13
- opencv_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
14
- text = pytesseract.image_to_string(opencv_image)
15
- return text
16
 
17
- # Function for search and highlight (similar to your existing code)
18
  def search_and_highlight(full_text, keyword):
19
- # Implement search and highlighting logic here
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  # Streamlit app layout
22
  st.title("Image Text Search App")
 
23
  uploaded_file = st.file_uploader("Upload an Image", type="jpg,png")
24
 
25
  if uploaded_file is not None:
26
- image_bytes = uploaded_file.read()
27
- st.image(image_bytes)
28
-
29
- # Perform OCR
30
- extracted_text = process_image(image_bytes)
31
- st.write("Extracted Text:")
32
- st.write(extracted_text)
33
-
34
- # Search functionality
35
- search_keyword = st.text_input("Enter a keyword to search:")
36
- if search_keyword:
37
- results, highlighted_text = search_and_highlight(extracted_text, search_keyword)
38
- if results:
39
- st.write(f"Keyword '{search_keyword}' found in the extracted text:")
40
- for i, result in enumerate(results, 1):
41
- st.write(f"{i}. ...{result}...")
42
- else:
43
- st.write(f"Keyword '{search_keyword}' not found in the extracted text.")
 
6
  import io
7
  import re
8
 
9
+ # ANSI escape codes for console color
10
+ YELLOW_HIGHLIGHT = '\033[43m'
11
+ RESET_COLOR = '\033[0m'
12
+
13
+ # Function for OCR processing
14
  def process_image(image_bytes):
15
+ # Convert bytes to image and process
16
+ image = Image.open(io.BytesIO(image_bytes))
17
+ opencv_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
18
+ text = pytesseract.image_to_string(opencv_image)
19
+ return text
20
 
21
+ # Function for search and highlight
22
  def search_and_highlight(full_text, keyword):
23
+ pattern = re.compile(re.escape(keyword), re.IGNORECASE)
24
+ matches = list(pattern.finditer(full_text))
25
+
26
+ if not matches:
27
+ return [], full_text
28
+
29
+ highlighted_text = full_text
30
+ html_text = full_text
31
+ results = []
32
+
33
+ for match in reversed(matches):
34
+ start, end = match.span()
35
+ context_start = max(0, start - 50)
36
+ context_end = min(len(full_text), end + 50)
37
+ context = full_text[context_start:context_end]
38
+
39
+ # Highlight for console output
40
+ highlighted_context = (
41
+ context[:start - context_start] +
42
+ YELLOW_HIGHLIGHT + context[start - context_start:end - context_start] + RESET_COLOR +
43
+ context[end - context_start:]
44
+ )
45
+ results.append(highlighted_context)
46
+
47
+ # Highlight for HTML output
48
+ html_text = (
49
+ html_text[:start] +
50
+ f'<span style="background-color: yellow;">{html_text[start:end]}</span>' +
51
+ html_text[end:]
52
+ )
53
+
54
+ return results, html_text
55
 
56
  # Streamlit app layout
57
  st.title("Image Text Search App")
58
+
59
  uploaded_file = st.file_uploader("Upload an Image", type="jpg,png")
60
 
61
  if uploaded_file is not None:
62
+ image_bytes = uploaded_file.read()
63
+ st.image(image_bytes)
64
+
65
+ # Perform OCR
66
+ extracted_text = process_image(image_bytes)
67
+ st.write("Extracted Text:")
68
+ st.write(extracted_text)
69
+
70
+ # Search functionality
71
+ search_keyword = st.text_input("Enter a keyword to search:")
72
+ if search_keyword:
73
+ results, highlighted_text = search_and_highlight(extracted_text, search_keyword)
74
+ if results:
75
+ st.write(f"Keyword '{search_keyword}' found in the extracted text:")
76
+ for i, result in enumerate(results, 1):
77
+ st.write(f"{i}. ...{result}...")
78
+ else:
79
+ st.write(f"Keyword '{search_keyword}' not found in the extracted text.")