PRIYANSHUDHAKED commited on
Commit
e50af9e
·
verified ·
1 Parent(s): 806527c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -79
app.py CHANGED
@@ -1,79 +1,54 @@
1
- import streamlit as st
2
- import cv2
3
- import pytesseract
4
- import numpy as np
5
- from PIL import Image
6
- import io
7
- import re
8
-
9
- # ANSI escape codes for console color
10
- YELLOW_HIGHLIGHT = '\033[43m'
11
- RESET_COLOR = '\033[0m'
12
-
13
- # Function for OCR processing
14
- def process_image(image_bytes):
15
- # Convert bytes to image and process
16
- image = Image.open(io.BytesIO(image_bytes))
17
- opencv_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
18
- text = pytesseract.image_to_string(opencv_image)
19
- return text
20
-
21
- # Function for search and highlight
22
- def search_and_highlight(full_text, keyword):
23
- pattern = re.compile(re.escape(keyword), re.IGNORECASE)
24
- matches = list(pattern.finditer(full_text))
25
-
26
- if not matches:
27
- return [], full_text
28
-
29
- highlighted_text = full_text
30
- html_text = full_text
31
- results = []
32
-
33
- for match in reversed(matches):
34
- start, end = match.span()
35
- context_start = max(0, start - 50)
36
- context_end = min(len(full_text), end + 50)
37
- context = full_text[context_start:context_end]
38
-
39
- # Highlight for console output
40
- highlighted_context = (
41
- context[:start - context_start] +
42
- YELLOW_HIGHLIGHT + context[start - context_start:end - context_start] + RESET_COLOR +
43
- context[end - context_start:]
44
- )
45
- results.append(highlighted_context)
46
-
47
- # Highlight for HTML output
48
- html_text = (
49
- html_text[:start] +
50
- f'<span style="background-color: yellow;">{html_text[start:end]}</span>' +
51
- html_text[end:]
52
- )
53
-
54
- return results, html_text
55
-
56
- # Streamlit app layout
57
- st.title("Image Text Search App")
58
-
59
- uploaded_file = st.file_uploader("Upload an Image (JPG or PNG)", type=["jpg", "jpeg", "png"])
60
-
61
- if uploaded_file is not None:
62
- image_bytes = uploaded_file.read()
63
- st.image(image_bytes)
64
-
65
- # Perform OCR
66
- extracted_text = process_image(image_bytes)
67
- st.write("Extracted Text:")
68
- st.write(extracted_text)
69
-
70
- # Search functionality
71
- search_keyword = st.text_input("Enter a keyword to search:")
72
- if search_keyword:
73
- results, highlighted_text = search_and_highlight(extracted_text, search_keyword)
74
- if results:
75
- st.write(f"Keyword '{search_keyword}' found in the extracted text:")
76
- for i, result in enumerate(results, 1):
77
- st.write(f"{i}. ...{result}...")
78
- else:
79
- st.write(f"Keyword '{search_keyword}' not found in the extracted text.")
 
1
+ # app.py
2
+ import streamlit as st
3
+ import cv2
4
+ import numpy as np
5
+ import pytesseract
6
+ from PIL import Image
7
+ import re
8
+
9
+ # Set the title of the webpage
10
+ st.title("OCR Text Extraction Tool")
11
+
12
+ # Uploading an image
13
+ uploaded_file = st.file_uploader("Upload an Image", type=["jpg", "jpeg", "png"])
14
+
15
+ if uploaded_file is not None:
16
+ # Convert the uploaded file content to an image
17
+ image = Image.open(uploaded_file)
18
+
19
+ # Convert PIL Image to OpenCV format
20
+ opencv_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
21
+
22
+ # Display the image
23
+ st.image(image, caption='Uploaded Image', use_column_width=True)
24
+
25
+ try:
26
+ # Perform OCR
27
+ text = pytesseract.image_to_string(opencv_image)
28
+
29
+ st.subheader("Extracted Text:")
30
+ st.write(text)
31
+
32
+ # Search functionality
33
+ search_keyword = st.text_input("Enter a keyword to search in the extracted text:")
34
+ if search_keyword:
35
+ pattern = re.compile(re.escape(search_keyword), re.IGNORECASE)
36
+ matches = list(pattern.finditer(text))
37
+
38
+ if matches:
39
+ st.markdown("### Keyword Found:")
40
+ for match in matches:
41
+ start, end = match.span()
42
+ context_start = max(0, start - 50)
43
+ context_end = min(len(text), end + 50)
44
+ context = text[context_start:context_end]
45
+ highlighted_text = (
46
+ context[:start-context_start] +
47
+ f"<span style='background-color: yellow;'>{context[start-context_start:end-context_start]}</span>" +
48
+ context[end-context_start:]
49
+ )
50
+ st.markdown(f"...{highlighted_text}...")
51
+ else:
52
+ st.write(f"Keyword '{search_keyword}' not found in the extracted text.")
53
+ except Exception as e:
54
+ st.error(f"An error occurred while processing the image: {str(e)}")