Spaces:
Running
Running
Update ocr_engine.py
Browse files- ocr_engine.py +55 -31
ocr_engine.py
CHANGED
@@ -21,7 +21,7 @@ def detect_roi(img):
|
|
21 |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
22 |
# Stricter threshold for bright areas
|
23 |
brightness = estimate_brightness(img)
|
24 |
-
thresh_value =
|
25 |
_, thresh = cv2.threshold(gray, thresh_value, 255, cv2.THRESH_BINARY)
|
26 |
# Morphological operations to connect digits
|
27 |
kernel = np.ones((9, 9), np.uint8)
|
@@ -29,13 +29,13 @@ def detect_roi(img):
|
|
29 |
# Find contours
|
30 |
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
31 |
if contours:
|
32 |
-
# Filter contours by size and aspect ratio
|
33 |
valid_contours = [c for c in contours if cv2.contourArea(c) > 500]
|
34 |
if valid_contours:
|
35 |
for contour in sorted(valid_contours, key=cv2.contourArea, reverse=True):
|
36 |
x, y, w, h = cv2.boundingRect(contour)
|
37 |
aspect_ratio = w / h
|
38 |
-
if 1.5 <= aspect_ratio <= 4.0 and w > 50 and h > 30:
|
39 |
x, y = max(0, x-40), max(0, y-40)
|
40 |
w, h = min(w+80, img.shape[1]-x), min(h+80, img.shape[0]-y)
|
41 |
return img[y:y+h, x:x+w]
|
@@ -44,16 +44,35 @@ def detect_roi(img):
|
|
44 |
logging.error(f"ROI detection failed: {str(e)}")
|
45 |
return img
|
46 |
|
47 |
-
def correct_seven_segment(text, bbox):
|
48 |
-
"""Correct common seven-segment misreads based on bounding box
|
49 |
-
if "6" in text:
|
50 |
-
#
|
51 |
(x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
text = text.replace("6", "2")
|
58 |
return text
|
59 |
|
@@ -63,14 +82,16 @@ def enhance_image(img, mode="standard"):
|
|
63 |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
64 |
|
65 |
if mode == "seven_segment":
|
66 |
-
#
|
67 |
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
68 |
return thresh
|
69 |
elif mode == "minimal":
|
70 |
-
#
|
71 |
-
|
72 |
-
_, thresh = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
73 |
return thresh
|
|
|
|
|
|
|
74 |
elif mode == "high_contrast":
|
75 |
denoised = cv2.bilateralFilter(gray, d=11, sigmaColor=100, sigmaSpace=100)
|
76 |
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
|
@@ -84,16 +105,19 @@ def enhance_image(img, mode="standard"):
|
|
84 |
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
85 |
thresh = clahe.apply(denoised)
|
86 |
|
87 |
-
if mode not in ["seven_segment", "minimal"]:
|
88 |
thresh = cv2.adaptiveThreshold(thresh, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
89 |
cv2.THRESH_BINARY, 11, 2)
|
90 |
|
91 |
-
# Morphological operations
|
92 |
-
|
93 |
-
|
|
|
|
|
|
|
94 |
|
95 |
-
# Skip sharpening for seven-segment and
|
96 |
-
if mode not in ["seven_segment", "minimal"]:
|
97 |
brightness = estimate_brightness(img)
|
98 |
sharpen_strength = 3 if brightness > 100 else 5
|
99 |
sharpen_kernel = np.array([[0, -1, 0], [-1, sharpen_strength, -1], [0, -1, 0]])
|
@@ -119,19 +143,19 @@ def extract_weight_from_image(pil_img):
|
|
119 |
|
120 |
# Estimate brightness for adaptive thresholding
|
121 |
brightness = estimate_brightness(img)
|
122 |
-
conf_threshold = 0.
|
123 |
|
124 |
# Detect ROI
|
125 |
roi_img = detect_roi(img)
|
126 |
|
127 |
# Process multiple image versions
|
128 |
images_to_process = [
|
129 |
-
("seven_segment", enhance_image(roi_img, mode="seven_segment"), {'contrast_ths': 0.
|
130 |
-
("minimal", enhance_image(roi_img, mode="minimal"), {'contrast_ths': 0.
|
131 |
-
("
|
132 |
-
("
|
133 |
-
("
|
134 |
-
("
|
135 |
]
|
136 |
|
137 |
best_weight = None
|
@@ -144,7 +168,7 @@ def extract_weight_from_image(pil_img):
|
|
144 |
|
145 |
for (bbox, text, conf) in results:
|
146 |
# Apply seven-segment correction
|
147 |
-
text = correct_seven_segment(text, bbox)
|
148 |
original_text = text
|
149 |
text = text.lower().strip()
|
150 |
|
@@ -166,7 +190,7 @@ def extract_weight_from_image(pil_img):
|
|
166 |
weight = float(text)
|
167 |
# Score based on realistic weight range (0.1–500 kg)
|
168 |
range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
|
169 |
-
#
|
170 |
digit_score = 1.5 if 10 <= weight < 100 else 1.0
|
171 |
score = conf * range_score * digit_score
|
172 |
if score > best_score and conf > conf_threshold:
|
|
|
21 |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
22 |
# Stricter threshold for bright areas
|
23 |
brightness = estimate_brightness(img)
|
24 |
+
thresh_value = 230 if brightness > 100 else 190
|
25 |
_, thresh = cv2.threshold(gray, thresh_value, 255, cv2.THRESH_BINARY)
|
26 |
# Morphological operations to connect digits
|
27 |
kernel = np.ones((9, 9), np.uint8)
|
|
|
29 |
# Find contours
|
30 |
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
31 |
if contours:
|
32 |
+
# Filter contours by size and aspect ratio
|
33 |
valid_contours = [c for c in contours if cv2.contourArea(c) > 500]
|
34 |
if valid_contours:
|
35 |
for contour in sorted(valid_contours, key=cv2.contourArea, reverse=True):
|
36 |
x, y, w, h = cv2.boundingRect(contour)
|
37 |
aspect_ratio = w / h
|
38 |
+
if 1.5 <= aspect_ratio <= 4.0 and w > 50 and h > 30:
|
39 |
x, y = max(0, x-40), max(0, y-40)
|
40 |
w, h = min(w+80, img.shape[1]-x), min(h+80, img.shape[0]-y)
|
41 |
return img[y:y+h, x:x+w]
|
|
|
44 |
logging.error(f"ROI detection failed: {str(e)}")
|
45 |
return img
|
46 |
|
47 |
+
def correct_seven_segment(text, bbox, img):
|
48 |
+
"""Correct common seven-segment misreads based on bounding box and pixel distribution"""
|
49 |
+
if "2" in text or "6" in text:
|
50 |
+
# Extract bounding box coordinates
|
51 |
(x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
|
52 |
+
x_min, x_max = min(x1, x4), max(x2, x3)
|
53 |
+
y_min, y_max = min(y1, y2), max(y3, y4)
|
54 |
+
# Ensure bounds are within image
|
55 |
+
x_min, y_min = max(0, int(x_min)), max(0, int(y_min))
|
56 |
+
x_max, y_max = min(img.shape[1], int(x_max)), min(img.shape[0], int(y_max))
|
57 |
+
if x_max <= x_min or y_max <= y_min:
|
58 |
+
return text
|
59 |
+
# Crop the digit area
|
60 |
+
digit_area = img[y_min:y_max, x_min:x_max]
|
61 |
+
if digit_area.size == 0:
|
62 |
+
return text
|
63 |
+
# Convert to grayscale and threshold
|
64 |
+
gray = cv2.cvtColor(digit_area, cv2.COLOR_BGR2GRAY)
|
65 |
+
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
66 |
+
# Calculate pixel distribution in upper vs lower half
|
67 |
+
h, w = thresh.shape
|
68 |
+
upper_half = thresh[:h//2, :]
|
69 |
+
lower_half = thresh[h//2:, :]
|
70 |
+
upper_pixels = np.sum(upper_half == 255)
|
71 |
+
lower_pixels = np.sum(lower_half == 255)
|
72 |
+
# "6" has more pixels in the lower half due to the loop, "2" is more balanced
|
73 |
+
if lower_pixels > upper_pixels * 1.5:
|
74 |
+
text = text.replace("2", "6")
|
75 |
+
else:
|
76 |
text = text.replace("6", "2")
|
77 |
return text
|
78 |
|
|
|
82 |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
83 |
|
84 |
if mode == "seven_segment":
|
85 |
+
# Extremely minimal preprocessing for seven-segment displays
|
86 |
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
87 |
return thresh
|
88 |
elif mode == "minimal":
|
89 |
+
# No blurring, just threshold
|
90 |
+
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
|
|
91 |
return thresh
|
92 |
+
elif mode == "raw":
|
93 |
+
# No preprocessing, just convert to grayscale
|
94 |
+
return gray
|
95 |
elif mode == "high_contrast":
|
96 |
denoised = cv2.bilateralFilter(gray, d=11, sigmaColor=100, sigmaSpace=100)
|
97 |
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
|
|
|
105 |
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
106 |
thresh = clahe.apply(denoised)
|
107 |
|
108 |
+
if mode not in ["seven_segment", "minimal", "raw"]:
|
109 |
thresh = cv2.adaptiveThreshold(thresh, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
110 |
cv2.THRESH_BINARY, 11, 2)
|
111 |
|
112 |
+
# Morphological operations only for non-seven-segment modes
|
113 |
+
if mode not in ["seven_segment", "minimal", "raw"]:
|
114 |
+
kernel = np.ones((3, 3), np.uint8)
|
115 |
+
morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
|
116 |
+
else:
|
117 |
+
morphed = thresh
|
118 |
|
119 |
+
# Skip sharpening for seven-segment, minimal, and raw modes
|
120 |
+
if mode not in ["seven_segment", "minimal", "raw"]:
|
121 |
brightness = estimate_brightness(img)
|
122 |
sharpen_strength = 3 if brightness > 100 else 5
|
123 |
sharpen_kernel = np.array([[0, -1, 0], [-1, sharpen_strength, -1], [0, -1, 0]])
|
|
|
143 |
|
144 |
# Estimate brightness for adaptive thresholding
|
145 |
brightness = estimate_brightness(img)
|
146 |
+
conf_threshold = 0.8 if brightness > 100 else 0.6 # Stricter for bright displays
|
147 |
|
148 |
# Detect ROI
|
149 |
roi_img = detect_roi(img)
|
150 |
|
151 |
# Process multiple image versions
|
152 |
images_to_process = [
|
153 |
+
("seven_segment", enhance_image(roi_img, mode="seven_segment"), {'contrast_ths': 0.15, 'adjust_contrast': 0.7, 'text_threshold': 0.8, 'allowlist': '0123456789.'}),
|
154 |
+
("minimal", enhance_image(roi_img, mode="minimal"), {'contrast_ths': 0.15, 'adjust_contrast': 0.7, 'text_threshold': 0.8, 'allowlist': '0123456789.'}),
|
155 |
+
("raw", enhance_image(roi_img, mode="raw"), {'contrast_ths': 0.15, 'adjust_contrast': 0.7, 'text_threshold': 0.8, 'allowlist': '0123456789.'}),
|
156 |
+
("standard", enhance_image(roi_img, mode="standard"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5, 'text_threshold': 0.7}),
|
157 |
+
("high_contrast", enhance_image(roi_img, mode="high_contrast"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5, 'text_threshold': 0.7}),
|
158 |
+
("low_noise", enhance_image(roi_img, mode="low_noise"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5, 'text_threshold': 0.7}),
|
159 |
]
|
160 |
|
161 |
best_weight = None
|
|
|
168 |
|
169 |
for (bbox, text, conf) in results:
|
170 |
# Apply seven-segment correction
|
171 |
+
text = correct_seven_segment(text, bbox, roi_img)
|
172 |
original_text = text
|
173 |
text = text.lower().strip()
|
174 |
|
|
|
190 |
weight = float(text)
|
191 |
# Score based on realistic weight range (0.1–500 kg)
|
192 |
range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
|
193 |
+
# Prefer two-digit weights for scales
|
194 |
digit_score = 1.5 if 10 <= weight < 100 else 1.0
|
195 |
score = conf * range_score * digit_score
|
196 |
if score > best_score and conf > conf_threshold:
|