PRIYANSHUDHAKED commited on
Commit
858b316
·
verified ·
1 Parent(s): cdadee9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -49
app.py CHANGED
@@ -1,54 +1,43 @@
1
- # app.py
2
- import streamlit as st
3
- import cv2
4
- import numpy as np
5
- import pytesseract
6
- from PIL import Image
7
- import re
8
 
9
- # Set the title of the webpage
10
- st.title("OCR Text Extraction Tool")
 
 
 
 
 
11
 
12
- # Uploading an image
13
- uploaded_file = st.file_uploader("Upload an Image", type=["jpg", "jpeg", "png"])
 
14
 
15
- if uploaded_file is not None:
16
- # Convert the uploaded file content to an image
17
- image = Image.open(uploaded_file)
18
-
19
- # Convert PIL Image to OpenCV format
20
- opencv_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
21
 
22
- # Display the image
23
- st.image(image, caption='Uploaded Image', use_column_width=True)
24
-
25
- try:
26
- # Perform OCR
27
- text = pytesseract.image_to_string(opencv_image)
28
-
29
- st.subheader("Extracted Text:")
30
- st.write(text)
31
 
32
- # Search functionality
33
- search_keyword = st.text_input("Enter a keyword to search in the extracted text:")
34
- if search_keyword:
35
- pattern = re.compile(re.escape(search_keyword), re.IGNORECASE)
36
- matches = list(pattern.finditer(text))
37
-
38
- if matches:
39
- st.markdown("### Keyword Found:")
40
- for match in matches:
41
- start, end = match.span()
42
- context_start = max(0, start - 50)
43
- context_end = min(len(text), end + 50)
44
- context = text[context_start:context_end]
45
- highlighted_text = (
46
- context[:start-context_start] +
47
- f"<span style='background-color: yellow;'>{context[start-context_start:end-context_start]}</span>" +
48
- context[end-context_start:]
49
- )
50
- st.markdown(f"...{highlighted_text}...")
51
- else:
52
- st.write(f"Keyword '{search_keyword}' not found in the extracted text.")
53
- except Exception as e:
54
- st.error(f"An error occurred while processing the image: {str(e)}")
 
1
+ import streamlit as st
2
+ import cv2
3
+ import pytesseract
4
+ import numpy as np
5
+ from PIL import Image
6
+ import io
7
+ import re
8
 
9
+ # Function for OCR processing (similar to your existing code)
10
+ def process_image(image_bytes):
11
+ # Convert bytes to image and process
12
+ image = Image.open(io.BytesIO(image_bytes))
13
+ opencv_image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
14
+ text = pytesseract.image_to_string(opencv_image)
15
+ return text
16
 
17
+ # Function for search and highlight (similar to your existing code)
18
+ def search_and_highlight(full_text, keyword):
19
+ # Implement search and highlighting logic here
20
 
21
+ # Streamlit app layout
22
+ st.title("Image Text Search App")
23
+ uploaded_file = st.file_uploader("Upload an Image", type="jpg,png")
 
 
 
24
 
25
+ if uploaded_file is not None:
26
+ image_bytes = uploaded_file.read()
27
+ st.image(image_bytes)
 
 
 
 
 
 
28
 
29
+ # Perform OCR
30
+ extracted_text = process_image(image_bytes)
31
+ st.write("Extracted Text:")
32
+ st.write(extracted_text)
33
+
34
+ # Search functionality
35
+ search_keyword = st.text_input("Enter a keyword to search:")
36
+ if search_keyword:
37
+ results, highlighted_text = search_and_highlight(extracted_text, search_keyword)
38
+ if results:
39
+ st.write(f"Keyword '{search_keyword}' found in the extracted text:")
40
+ for i, result in enumerate(results, 1):
41
+ st.write(f"{i}. ...{result}...")
42
+ else:
43
+ st.write(f"Keyword '{search_keyword}' not found in the extracted text.")