Spaces:
Running
Running
Update ocr_engine.py
Browse files- ocr_engine.py +55 -34
ocr_engine.py
CHANGED
@@ -19,40 +19,58 @@ def detect_roi(img):
|
|
19 |
"""Detect and crop the region of interest (likely the digital display)"""
|
20 |
try:
|
21 |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
22 |
-
#
|
23 |
brightness = estimate_brightness(img)
|
24 |
-
thresh_value =
|
25 |
_, thresh = cv2.threshold(gray, thresh_value, 255, cv2.THRESH_BINARY)
|
26 |
-
#
|
27 |
-
kernel = np.ones((
|
28 |
-
dilated = cv2.dilate(thresh, kernel, iterations=
|
29 |
# Find contours
|
30 |
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
31 |
if contours:
|
32 |
-
#
|
33 |
valid_contours = [c for c in contours if cv2.contourArea(c) > 500]
|
34 |
if valid_contours:
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
return img
|
43 |
except Exception as e:
|
44 |
logging.error(f"ROI detection failed: {str(e)}")
|
45 |
return img
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
def enhance_image(img, mode="standard"):
|
48 |
"""Enhance image with different modes for multi-scale processing"""
|
49 |
try:
|
50 |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
51 |
|
52 |
if mode == "seven_segment":
|
53 |
-
#
|
54 |
-
|
|
|
|
|
|
|
|
|
55 |
_, thresh = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
|
|
56 |
elif mode == "high_contrast":
|
57 |
denoised = cv2.bilateralFilter(gray, d=11, sigmaColor=100, sigmaSpace=100)
|
58 |
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
|
@@ -66,7 +84,7 @@ def enhance_image(img, mode="standard"):
|
|
66 |
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
67 |
thresh = clahe.apply(denoised)
|
68 |
|
69 |
-
if mode
|
70 |
thresh = cv2.adaptiveThreshold(thresh, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
71 |
cv2.THRESH_BINARY, 11, 2)
|
72 |
|
@@ -74,21 +92,22 @@ def enhance_image(img, mode="standard"):
|
|
74 |
kernel = np.ones((3, 3), np.uint8)
|
75 |
morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
|
76 |
|
77 |
-
#
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
|
|
82 |
|
83 |
# Dynamic resizing
|
84 |
-
h, w =
|
85 |
target_size = 800
|
86 |
scale_factor = min(target_size / max(h, w), 2.0) if max(h, w) < 300 else min(target_size / max(h, w), 1.0)
|
87 |
if scale_factor != 1.0:
|
88 |
-
|
89 |
-
|
90 |
|
91 |
-
return
|
92 |
except Exception as e:
|
93 |
logging.error(f"Image enhancement failed (mode={mode}): {str(e)}")
|
94 |
return img
|
@@ -100,18 +119,19 @@ def extract_weight_from_image(pil_img):
|
|
100 |
|
101 |
# Estimate brightness for adaptive thresholding
|
102 |
brightness = estimate_brightness(img)
|
103 |
-
conf_threshold = 0.
|
104 |
|
105 |
# Detect ROI
|
106 |
roi_img = detect_roi(img)
|
107 |
|
108 |
# Process multiple image versions
|
109 |
images_to_process = [
|
110 |
-
("seven_segment", enhance_image(roi_img, mode="seven_segment"), {'contrast_ths': 0.
|
111 |
-
("
|
112 |
-
("
|
113 |
-
("
|
114 |
-
("
|
|
|
115 |
]
|
116 |
|
117 |
best_weight = None
|
@@ -123,6 +143,8 @@ def extract_weight_from_image(pil_img):
|
|
123 |
results = easyocr_reader.readtext(proc_img, detail=1, paragraph=False, **ocr_params)
|
124 |
|
125 |
for (bbox, text, conf) in results:
|
|
|
|
|
126 |
original_text = text
|
127 |
text = text.lower().strip()
|
128 |
|
@@ -135,7 +157,6 @@ def extract_weight_from_image(pil_img):
|
|
135 |
text = text.replace("b", "8").replace("B", "8")
|
136 |
text = text.replace("z", "2").replace("Z", "2")
|
137 |
text = text.replace("q", "9").replace("Q", "9")
|
138 |
-
text = text.replace("6", "2").replace("9", "2") # Specific correction for seven-segment
|
139 |
text = text.replace("kgs", "").replace("kg", "").replace("k", "")
|
140 |
text = re.sub(r"[^\d\.]", "", text)
|
141 |
|
@@ -145,8 +166,8 @@ def extract_weight_from_image(pil_img):
|
|
145 |
weight = float(text)
|
146 |
# Score based on realistic weight range (0.1–500 kg)
|
147 |
range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
|
148 |
-
#
|
149 |
-
digit_score = 1.
|
150 |
score = conf * range_score * digit_score
|
151 |
if score > best_score and conf > conf_threshold:
|
152 |
best_weight = text
|
|
|
19 |
"""Detect and crop the region of interest (likely the digital display)"""
|
20 |
try:
|
21 |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
22 |
+
# Stricter threshold for bright areas
|
23 |
brightness = estimate_brightness(img)
|
24 |
+
thresh_value = 220 if brightness > 100 else 180
|
25 |
_, thresh = cv2.threshold(gray, thresh_value, 255, cv2.THRESH_BINARY)
|
26 |
+
# Morphological operations to connect digits
|
27 |
+
kernel = np.ones((9, 9), np.uint8)
|
28 |
+
dilated = cv2.dilate(thresh, kernel, iterations=3)
|
29 |
# Find contours
|
30 |
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
31 |
if contours:
|
32 |
+
# Filter contours by size and aspect ratio (typical for displays)
|
33 |
valid_contours = [c for c in contours if cv2.contourArea(c) > 500]
|
34 |
if valid_contours:
|
35 |
+
for contour in sorted(valid_contours, key=cv2.contourArea, reverse=True):
|
36 |
+
x, y, w, h = cv2.boundingRect(contour)
|
37 |
+
aspect_ratio = w / h
|
38 |
+
if 1.5 <= aspect_ratio <= 4.0 and w > 50 and h > 30: # Typical display aspect ratio
|
39 |
+
x, y = max(0, x-40), max(0, y-40)
|
40 |
+
w, h = min(w+80, img.shape[1]-x), min(h+80, img.shape[0]-y)
|
41 |
+
return img[y:y+h, x:x+w]
|
42 |
return img
|
43 |
except Exception as e:
|
44 |
logging.error(f"ROI detection failed: {str(e)}")
|
45 |
return img
|
46 |
|
47 |
+
def correct_seven_segment(text, bbox):
|
48 |
+
"""Correct common seven-segment misreads based on bounding box shape"""
|
49 |
+
if "6" in text:
|
50 |
+
# Check bounding box aspect ratio to differentiate "6" from "2"
|
51 |
+
(x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
|
52 |
+
width = abs(x2 - x1)
|
53 |
+
height = abs(y2 - y1)
|
54 |
+
aspect_ratio = width / height if height > 0 else 1.0
|
55 |
+
# "2" typically has a more rectangular shape in seven-segment
|
56 |
+
if aspect_ratio > 0.5: # Adjust based on typical "2" vs "6" shapes
|
57 |
+
text = text.replace("6", "2")
|
58 |
+
return text
|
59 |
+
|
60 |
def enhance_image(img, mode="standard"):
|
61 |
"""Enhance image with different modes for multi-scale processing"""
|
62 |
try:
|
63 |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
64 |
|
65 |
if mode == "seven_segment":
|
66 |
+
# Minimal preprocessing for seven-segment displays
|
67 |
+
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
68 |
+
return thresh
|
69 |
+
elif mode == "minimal":
|
70 |
+
# Very light preprocessing
|
71 |
+
denoised = cv2.GaussianBlur(gray, (3, 3), 0)
|
72 |
_, thresh = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
73 |
+
return thresh
|
74 |
elif mode == "high_contrast":
|
75 |
denoised = cv2.bilateralFilter(gray, d=11, sigmaColor=100, sigmaSpace=100)
|
76 |
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
|
|
|
84 |
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
85 |
thresh = clahe.apply(denoised)
|
86 |
|
87 |
+
if mode not in ["seven_segment", "minimal"]:
|
88 |
thresh = cv2.adaptiveThreshold(thresh, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
89 |
cv2.THRESH_BINARY, 11, 2)
|
90 |
|
|
|
92 |
kernel = np.ones((3, 3), np.uint8)
|
93 |
morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
|
94 |
|
95 |
+
# Skip sharpening for seven-segment and minimal modes
|
96 |
+
if mode not in ["seven_segment", "minimal"]:
|
97 |
+
brightness = estimate_brightness(img)
|
98 |
+
sharpen_strength = 3 if brightness > 100 else 5
|
99 |
+
sharpen_kernel = np.array([[0, -1, 0], [-1, sharpen_strength, -1], [0, -1, 0]])
|
100 |
+
morphed = cv2.filter2D(morphed, -1, sharpen_kernel)
|
101 |
|
102 |
# Dynamic resizing
|
103 |
+
h, w = morphed.shape
|
104 |
target_size = 800
|
105 |
scale_factor = min(target_size / max(h, w), 2.0) if max(h, w) < 300 else min(target_size / max(h, w), 1.0)
|
106 |
if scale_factor != 1.0:
|
107 |
+
morphed = cv2.resize(morphed, None, fx=scale_factor, fy=scale_factor,
|
108 |
+
interpolation=cv2.INTER_CUBIC if scale_factor > 1 else cv2.INTER_AREA)
|
109 |
|
110 |
+
return morphed
|
111 |
except Exception as e:
|
112 |
logging.error(f"Image enhancement failed (mode={mode}): {str(e)}")
|
113 |
return img
|
|
|
119 |
|
120 |
# Estimate brightness for adaptive thresholding
|
121 |
brightness = estimate_brightness(img)
|
122 |
+
conf_threshold = 0.7 if brightness > 100 else 0.5 # Stricter for bright displays
|
123 |
|
124 |
# Detect ROI
|
125 |
roi_img = detect_roi(img)
|
126 |
|
127 |
# Process multiple image versions
|
128 |
images_to_process = [
|
129 |
+
("seven_segment", enhance_image(roi_img, mode="seven_segment"), {'contrast_ths': 0.2, 'adjust_contrast': 0.5, 'allowlist': '0123456789.'}),
|
130 |
+
("minimal", enhance_image(roi_img, mode="minimal"), {'contrast_ths': 0.2, 'adjust_contrast': 0.5, 'allowlist': '0123456789.'}),
|
131 |
+
("standard", enhance_image(roi_img, mode="standard"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5}),
|
132 |
+
("high_contrast", enhance_image(roi_img, mode="high_contrast"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5}),
|
133 |
+
("low_noise", enhance_image(roi_img, mode="low_noise"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5}),
|
134 |
+
("original", roi_img, {'contrast_ths': 0.2, 'adjust_contrast': 0.5, 'allowlist': '0123456789.'})
|
135 |
]
|
136 |
|
137 |
best_weight = None
|
|
|
143 |
results = easyocr_reader.readtext(proc_img, detail=1, paragraph=False, **ocr_params)
|
144 |
|
145 |
for (bbox, text, conf) in results:
|
146 |
+
# Apply seven-segment correction
|
147 |
+
text = correct_seven_segment(text, bbox)
|
148 |
original_text = text
|
149 |
text = text.lower().strip()
|
150 |
|
|
|
157 |
text = text.replace("b", "8").replace("B", "8")
|
158 |
text = text.replace("z", "2").replace("Z", "2")
|
159 |
text = text.replace("q", "9").replace("Q", "9")
|
|
|
160 |
text = text.replace("kgs", "").replace("kg", "").replace("k", "")
|
161 |
text = re.sub(r"[^\d\.]", "", text)
|
162 |
|
|
|
166 |
weight = float(text)
|
167 |
# Score based on realistic weight range (0.1–500 kg)
|
168 |
range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
|
169 |
+
# Strongly prefer two-digit weights for scales
|
170 |
+
digit_score = 1.5 if 10 <= weight < 100 else 1.0
|
171 |
score = conf * range_score * digit_score
|
172 |
if score > best_score and conf > conf_threshold:
|
173 |
best_weight = text
|