Sanjayraju30 commited on
Commit
3137c41
·
verified ·
1 Parent(s): b613b80

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +57 -43
ocr_engine.py CHANGED
@@ -38,15 +38,39 @@ def preprocess_image(img):
38
  denoised = cv2.bilateralFilter(gray, 11, 17, 17)
39
  save_debug_image(denoised, "01_preprocess_bilateral")
40
  # Enhance contrast using CLAHE
41
- clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
42
  enhanced = clahe.apply(denoised)
43
  save_debug_image(enhanced, "02_preprocess_clahe")
44
- return enhanced
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  def detect_roi(img):
47
  """Detect and crop the region of interest (likely the digital display)."""
48
  try:
49
- save_debug_image(img, "03_original")
50
  preprocessed = preprocess_image(img)
51
  brightness_map = cv2.GaussianBlur(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), (15, 15), 0)
52
 
@@ -56,13 +80,13 @@ def detect_roi(img):
56
  cv2.THRESH_BINARY_INV, block_size, 5)
57
  _, otsu_thresh = cv2.threshold(preprocessed, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
58
  combined_thresh = cv2.bitwise_and(thresh, otsu_thresh)
59
- save_debug_image(combined_thresh, "04_roi_combined_threshold")
60
 
61
  # Morphological operations to connect digits
62
  kernel = np.ones((5, 5), np.uint8)
63
  dilated = cv2.dilate(combined_thresh, kernel, iterations=2)
64
  eroded = cv2.erode(dilated, kernel, iterations=1)
65
- save_debug_image(eroded, "05_roi_morphological")
66
 
67
  contours, _ = cv2.findContours(eroded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
68
 
@@ -74,49 +98,49 @@ def detect_roi(img):
74
  x, y, w, h = cv2.boundingRect(c)
75
  roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
76
  aspect_ratio = w / h
77
- if (1000 < area < (img_area * 0.9) and
78
- 1.0 <= aspect_ratio <= 10.0 and w > 80 and h > 40 and roi_brightness > 100):
79
  valid_contours.append((c, roi_brightness))
80
  logging.debug(f"Contour: Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
81
 
82
  if valid_contours:
83
  contour, _ = max(valid_contours, key=lambda x: x[1]) # Max brightness
84
  x, y, w, h = cv2.boundingRect(contour)
85
- padding = 80
86
  x, y = max(0, x - padding), max(0, y - padding)
87
  w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
88
  roi_img = img[y:y+h, x:x+w]
89
- save_debug_image(roi_img, "06_detected_roi")
90
  logging.info(f"Detected ROI with dimensions: ({x}, {y}, {w}, {h})")
91
  return roi_img, (x, y, w, h)
92
 
93
  logging.info("No suitable ROI found, attempting fallback criteria.")
94
  # Fallback with relaxed criteria
95
- valid_contours = [c for c in contours if 500 < cv2.contourArea(c) < (img_area * 0.95) and
96
- 0.8 <= cv2.boundingRect(c)[2]/cv2.boundingRect(c)[3] <= 12.0]
97
  if valid_contours:
98
  contour = max(valid_contours, key=cv2.contourArea)
99
  x, y, w, h = cv2.boundingRect(contour)
100
- padding = 80
101
  x, y = max(0, x - padding), max(0, y - padding)
102
  w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
103
  roi_img = img[y:y+h, x:x+w]
104
- save_debug_image(roi_img, "06_detected_roi_fallback")
105
  logging.info(f"Detected fallback ROI with dimensions: ({x}, {y}, {w}, {h})")
106
  return roi_img, (x, y, w, h)
107
 
108
  logging.info("No suitable ROI found, returning original image.")
109
- save_debug_image(img, "06_no_roi_original_fallback")
110
  return img, None
111
  except Exception as e:
112
  logging.error(f"ROI detection failed: {str(e)}")
113
- save_debug_image(img, "06_roi_detection_error_fallback")
114
  return img, None
115
 
116
  def detect_segments(digit_img, brightness):
117
  """Detect seven-segment patterns in a digit image."""
118
  h, w = digit_img.shape
119
- if h < 20 or w < 15:
120
  return None
121
 
122
  segments = {
@@ -139,7 +163,7 @@ def detect_segments(digit_img, brightness):
139
  continue
140
  pixel_count = np.sum(region == 255)
141
  total_pixels = region.size
142
- segment_presence[name] = pixel_count / total_pixels > (0.3 if brightness < 100 else 0.5)
143
 
144
  digit_patterns = {
145
  '0': ('top', 'bottom', 'left_top', 'left_bottom', 'right_top', 'right_bottom'),
@@ -159,8 +183,8 @@ def detect_segments(digit_img, brightness):
159
  for digit, pattern in digit_patterns.items():
160
  matches = sum(1 for segment in pattern if segment_presence.get(segment, False))
161
  non_matches_penalty = sum(1 for segment in segment_presence if segment not in pattern and segment_presence[segment])
162
- score = matches - 0.3 * non_matches_penalty
163
- if matches >= len(pattern) * 0.8:
164
  score += 1.0
165
  if score > max_score:
166
  max_score = score
@@ -176,12 +200,12 @@ def custom_seven_segment_ocr(img, roi_bbox):
176
  brightness = estimate_brightness(img)
177
  thresh_value = 100 if brightness < 100 else 0
178
  _, thresh = cv2.threshold(preprocessed, thresh_value, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
179
- save_debug_image(thresh, "07_roi_thresh_for_digits")
180
 
181
  # Morphological operations to enhance digit segments
182
  kernel = np.ones((3, 3), np.uint8)
183
  thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
184
- save_debug_image(thresh, "08_morph_closed")
185
 
186
  batch_size = max(4, min(16, int(img.shape[0] * img.shape[1] / 100000)))
187
  results = easyocr_reader.readtext(thresh, detail=1, paragraph=False,
@@ -198,7 +222,7 @@ def custom_seven_segment_ocr(img, roi_bbox):
198
  for (bbox, text, conf) in results:
199
  (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
200
  h_bbox = max(y1, y2, y3, y4) - min(y1, y2, y3, y4)
201
- if len(text) == 1 and (text.isdigit() or text == '.') and h_bbox > 10:
202
  x_min, x_max = int(min(x1, x4)), int(max(x2, x3))
203
  y_min, y_max = int(min(y1, y2)), int(max(y3, y4))
204
  digits_info.append((x_min, x_max, y_min, y_max, text, conf))
@@ -211,7 +235,7 @@ def custom_seven_segment_ocr(img, roi_bbox):
211
  if x_max <= x_min or y_max <= y_min:
212
  continue
213
  digit_img_crop = thresh[y_min:y_max, x_min:x_max]
214
- save_debug_image(digit_img_crop, f"09_digit_crop_{idx}_{easyocr_char}")
215
  if easyocr_conf > 0.95 or easyocr_char == '.':
216
  recognized_text += easyocr_char
217
  else:
@@ -240,20 +264,12 @@ def extract_weight_from_image(pil_img):
240
  img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
241
  save_debug_image(img, "00_input_image")
242
 
243
- # Rotation correction
244
- edges = cv2.Canny(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), 100, 200)
245
- lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=100, minLineLength=100, maxLineGap=10)
246
- if lines is not None:
247
- angle = np.mean([np.arctan2(line[0][3] - line[0][1], line[0][2] - line[0][0]) * 180 / np.pi for line in lines])
248
- if abs(angle) > 5:
249
- (h, w) = img.shape[:2]
250
- center = (w // 2, h // 2)
251
- M = cv2.getRotationMatrix2D(center, angle, 1.0)
252
- img = cv2.warpAffine(img, M, (w, h))
253
- save_debug_image(img, "00_rotated_image")
254
 
255
  brightness = estimate_brightness(img)
256
  conf_threshold = 0.7 if brightness > 150 else (0.6 if brightness > 80 else 0.4)
 
257
  roi_img, roi_bbox = detect_roi(img)
258
  if roi_bbox:
259
  roi_area = roi_bbox[2] * roi_bbox[3]
@@ -263,7 +279,7 @@ def extract_weight_from_image(pil_img):
263
  if custom_result:
264
  try:
265
  weight = float(custom_result)
266
- if 0.01 <= weight <= 500:
267
  logging.info(f"Custom OCR result: {custom_result}, Confidence: 95.0%")
268
  return custom_result, 95.0
269
  else:
@@ -273,11 +289,8 @@ def extract_weight_from_image(pil_img):
273
 
274
  logging.info("Custom OCR failed or invalid, falling back to enhanced EasyOCR.")
275
  preprocessed_roi = preprocess_image(roi_img)
276
- kernel_sharpening = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]])
277
- sharpened_roi = cv2.filter2D(preprocessed_roi, -1, kernel_sharpening)
278
- save_debug_image(sharpened_roi, "10_fallback_sharpened")
279
  block_size = max(11, min(31, int(roi_img.shape[0] / 20) * 2 + 1))
280
- final_roi = cv2.adaptiveThreshold(sharpened_roi, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
281
  cv2.THRESH_BINARY_INV, block_size, 8)
282
  save_debug_image(final_roi, "11_fallback_adaptive_thresh")
283
 
@@ -312,9 +325,9 @@ def extract_weight_from_image(pil_img):
312
  weight /= 1000 # Convert grams to kilograms
313
  elif unit == 'lb':
314
  weight *= 0.453592 # Convert pounds to kilograms
315
- range_score = 1.5 if 0.01 <= weight <= 500 else 0.8
316
  digit_count = len(text.replace('.', ''))
317
- digit_score = 1.3 if 2 <= digit_count <= 6 else 0.9
318
  score = conf * range_score * digit_score
319
  if roi_bbox:
320
  (x_roi, y_roi, w_roi, h_roi) = roi_bbox
@@ -331,6 +344,7 @@ def extract_weight_from_image(pil_img):
331
  logging.info(f"Candidate EasyOCR weight: '{text}', Unit: {unit or 'none'}, Conf: {conf}, Score: {score}")
332
  except ValueError:
333
  logging.warning(f"Could not convert '{text}' to float during EasyOCR fallback.")
 
334
 
335
  if not best_weight:
336
  logging.info("No valid weight detected after all attempts.")
@@ -347,12 +361,12 @@ def extract_weight_from_image(pil_img):
347
 
348
  try:
349
  final_weight = float(best_weight)
350
- if final_weight < 0.01 or final_weight > 500:
351
  best_conf *= 0.7
352
  except ValueError:
353
  pass
354
 
355
- logging.info(f"Final detected weight: {best_weight}, Unit: {unit or 'none'}, Confidence: {round(best_conf * 100, 2)}%")
356
  return best_weight, round(best_conf * 100, 2)
357
 
358
  except Exception as e:
 
38
  denoised = cv2.bilateralFilter(gray, 11, 17, 17)
39
  save_debug_image(denoised, "01_preprocess_bilateral")
40
  # Enhance contrast using CLAHE
41
+ clahe = cv2.createCLAHE(clipLimit=2.5, tileGridSize=(8, 8))
42
  enhanced = clahe.apply(denoised)
43
  save_debug_image(enhanced, "02_preprocess_clahe")
44
+ # Sharpen the image
45
+ kernel_sharpening = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]])
46
+ sharpened = cv2.filter2D(enhanced, -1, kernel_sharpening)
47
+ save_debug_image(sharpened, "03_preprocess_sharpened")
48
+ return sharpened
49
+
50
+ def correct_rotation(img):
51
+ """Correct image rotation using Hough Transform."""
52
+ try:
53
+ edges = cv2.Canny(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), 100, 200)
54
+ lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=100, minLineLength=100, maxLineGap=10)
55
+ if lines is not None:
56
+ angles = [np.arctan2(line[0][3] - line[0][1], line[0][2] - line[0][0]) * 180 / np.pi for line in lines]
57
+ angle = np.median(angles) # Use median for robustness
58
+ if abs(angle) > 5:
59
+ (h, w) = img.shape[:2]
60
+ center = (w // 2, h // 2)
61
+ M = cv2.getRotationMatrix2D(center, angle, 1.0)
62
+ img = cv2.warpAffine(img, M, (w, h))
63
+ save_debug_image(img, "00_rotated_image")
64
+ logging.info(f"Applied rotation correction: {angle:.2f} degrees")
65
+ return img
66
+ except Exception as e:
67
+ logging.error(f"Rotation correction failed: {str(e)}")
68
+ return img
69
 
70
  def detect_roi(img):
71
  """Detect and crop the region of interest (likely the digital display)."""
72
  try:
73
+ save_debug_image(img, "04_original")
74
  preprocessed = preprocess_image(img)
75
  brightness_map = cv2.GaussianBlur(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), (15, 15), 0)
76
 
 
80
  cv2.THRESH_BINARY_INV, block_size, 5)
81
  _, otsu_thresh = cv2.threshold(preprocessed, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
82
  combined_thresh = cv2.bitwise_and(thresh, otsu_thresh)
83
+ save_debug_image(combined_thresh, "05_roi_combined_threshold")
84
 
85
  # Morphological operations to connect digits
86
  kernel = np.ones((5, 5), np.uint8)
87
  dilated = cv2.dilate(combined_thresh, kernel, iterations=2)
88
  eroded = cv2.erode(dilated, kernel, iterations=1)
89
+ save_debug_image(eroded, "06_roi_morphological")
90
 
91
  contours, _ = cv2.findContours(eroded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
92
 
 
98
  x, y, w, h = cv2.boundingRect(c)
99
  roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
100
  aspect_ratio = w / h
101
+ if (500 < area < (img_area * 0.9) and
102
+ 0.8 <= aspect_ratio <= 12.0 and w > 60 and h > 30 and roi_brightness > 80):
103
  valid_contours.append((c, roi_brightness))
104
  logging.debug(f"Contour: Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
105
 
106
  if valid_contours:
107
  contour, _ = max(valid_contours, key=lambda x: x[1]) # Max brightness
108
  x, y, w, h = cv2.boundingRect(contour)
109
+ padding = 100
110
  x, y = max(0, x - padding), max(0, y - padding)
111
  w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
112
  roi_img = img[y:y+h, x:x+w]
113
+ save_debug_image(roi_img, "07_detected_roi")
114
  logging.info(f"Detected ROI with dimensions: ({x}, {y}, {w}, {h})")
115
  return roi_img, (x, y, w, h)
116
 
117
  logging.info("No suitable ROI found, attempting fallback criteria.")
118
  # Fallback with relaxed criteria
119
+ valid_contours = [c for c in contours if 300 < cv2.contourArea(c) < (img_area * 0.95) and
120
+ 0.5 <= cv2.boundingRect(c)[2]/cv2.boundingRect(c)[3] <= 15.0]
121
  if valid_contours:
122
  contour = max(valid_contours, key=cv2.contourArea)
123
  x, y, w, h = cv2.boundingRect(contour)
124
+ padding = 100
125
  x, y = max(0, x - padding), max(0, y - padding)
126
  w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
127
  roi_img = img[y:y+h, x:x+w]
128
+ save_debug_image(roi_img, "07_detected_roi_fallback")
129
  logging.info(f"Detected fallback ROI with dimensions: ({x}, {y}, {w}, {h})")
130
  return roi_img, (x, y, w, h)
131
 
132
  logging.info("No suitable ROI found, returning original image.")
133
+ save_debug_image(img, "07_no_roi_original_fallback")
134
  return img, None
135
  except Exception as e:
136
  logging.error(f"ROI detection failed: {str(e)}")
137
+ save_debug_image(img, "07_roi_detection_error_fallback")
138
  return img, None
139
 
140
  def detect_segments(digit_img, brightness):
141
  """Detect seven-segment patterns in a digit image."""
142
  h, w = digit_img.shape
143
+ if h < 15 or w < 10:
144
  return None
145
 
146
  segments = {
 
163
  continue
164
  pixel_count = np.sum(region == 255)
165
  total_pixels = region.size
166
+ segment_presence[name] = pixel_count / total_pixels > (0.25 if brightness < 100 else 0.45)
167
 
168
  digit_patterns = {
169
  '0': ('top', 'bottom', 'left_top', 'left_bottom', 'right_top', 'right_bottom'),
 
183
  for digit, pattern in digit_patterns.items():
184
  matches = sum(1 for segment in pattern if segment_presence.get(segment, False))
185
  non_matches_penalty = sum(1 for segment in segment_presence if segment not in pattern and segment_presence[segment])
186
+ score = matches - 0.2 * non_matches_penalty
187
+ if matches >= len(pattern) * 0.75:
188
  score += 1.0
189
  if score > max_score:
190
  max_score = score
 
200
  brightness = estimate_brightness(img)
201
  thresh_value = 100 if brightness < 100 else 0
202
  _, thresh = cv2.threshold(preprocessed, thresh_value, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
203
+ save_debug_image(thresh, "08_roi_thresh_for_digits")
204
 
205
  # Morphological operations to enhance digit segments
206
  kernel = np.ones((3, 3), np.uint8)
207
  thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
208
+ save_debug_image(thresh, "09_morph_closed")
209
 
210
  batch_size = max(4, min(16, int(img.shape[0] * img.shape[1] / 100000)))
211
  results = easyocr_reader.readtext(thresh, detail=1, paragraph=False,
 
222
  for (bbox, text, conf) in results:
223
  (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
224
  h_bbox = max(y1, y2, y3, y4) - min(y1, y2, y3, y4)
225
+ if len(text) == 1 and (text.isdigit() or text == '.') and h_bbox > 8:
226
  x_min, x_max = int(min(x1, x4)), int(max(x2, x3))
227
  y_min, y_max = int(min(y1, y2)), int(max(y3, y4))
228
  digits_info.append((x_min, x_max, y_min, y_max, text, conf))
 
235
  if x_max <= x_min or y_max <= y_min:
236
  continue
237
  digit_img_crop = thresh[y_min:y_max, x_min:x_max]
238
+ save_debug_image(digit_img_crop, f"10_digit_crop_{idx}_{easyocr_char}")
239
  if easyocr_conf > 0.95 or easyocr_char == '.':
240
  recognized_text += easyocr_char
241
  else:
 
264
  img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
265
  save_debug_image(img, "00_input_image")
266
 
267
+ # Apply rotation correction
268
+ img = correct_rotation(img)
 
 
 
 
 
 
 
 
 
269
 
270
  brightness = estimate_brightness(img)
271
  conf_threshold = 0.7 if brightness > 150 else (0.6 if brightness > 80 else 0.4)
272
+
273
  roi_img, roi_bbox = detect_roi(img)
274
  if roi_bbox:
275
  roi_area = roi_bbox[2] * roi_bbox[3]
 
279
  if custom_result:
280
  try:
281
  weight = float(custom_result)
282
+ if 0.001 <= weight <= 1000:
283
  logging.info(f"Custom OCR result: {custom_result}, Confidence: 95.0%")
284
  return custom_result, 95.0
285
  else:
 
289
 
290
  logging.info("Custom OCR failed or invalid, falling back to enhanced EasyOCR.")
291
  preprocessed_roi = preprocess_image(roi_img)
 
 
 
292
  block_size = max(11, min(31, int(roi_img.shape[0] / 20) * 2 + 1))
293
+ final_roi = cv2.adaptiveThreshold(preprocessed_roi, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
294
  cv2.THRESH_BINARY_INV, block_size, 8)
295
  save_debug_image(final_roi, "11_fallback_adaptive_thresh")
296
 
 
325
  weight /= 1000 # Convert grams to kilograms
326
  elif unit == 'lb':
327
  weight *= 0.453592 # Convert pounds to kilograms
328
+ range_score = 1.5 if 0.001 <= weight <= 1000 else 0.8
329
  digit_count = len(text.replace('.', ''))
330
+ digit_score = 1.3 if 2 <= digit_count <= 7 else 0.9
331
  score = conf * range_score * digit_score
332
  if roi_bbox:
333
  (x_roi, y_roi, w_roi, h_roi) = roi_bbox
 
344
  logging.info(f"Candidate EasyOCR weight: '{text}', Unit: {unit or 'none'}, Conf: {conf}, Score: {score}")
345
  except ValueError:
346
  logging.warning(f"Could not convert '{text}' to float during EasyOCR fallback.")
347
+ continue
348
 
349
  if not best_weight:
350
  logging.info("No valid weight detected after all attempts.")
 
361
 
362
  try:
363
  final_weight = float(best_weight)
364
+ if final_weight < 0.001 or final_weight > 1000:
365
  best_conf *= 0.7
366
  except ValueError:
367
  pass
368
 
369
+ logging.info(f"Final detected weight: {best_weight} kg, Confidence: {round(best_conf * 100, 2)}%")
370
  return best_weight, round(best_conf * 100, 2)
371
 
372
  except Exception as e: