Sanjayraju30 commited on
Commit
8254c9e
·
verified ·
1 Parent(s): 554a2ee

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +155 -230
ocr_engine.py CHANGED
@@ -6,7 +6,7 @@ import logging
6
  from datetime import datetime
7
  import os
8
 
9
- # Set up logging for debugging
10
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
11
 
12
  # Initialize EasyOCR
@@ -17,147 +17,98 @@ DEBUG_DIR = "debug_images"
17
  os.makedirs(DEBUG_DIR, exist_ok=True)
18
 
19
  def save_debug_image(img, filename_suffix, prefix=""):
20
- """Saves an image to the debug directory with a timestamp."""
21
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
22
  filename = os.path.join(DEBUG_DIR, f"{prefix}{timestamp}_{filename_suffix}.png")
23
- if len(img.shape) == 3: # Color image
24
  cv2.imwrite(filename, img)
25
- else: # Grayscale image
26
  cv2.imwrite(filename, img)
27
  logging.info(f"Saved debug image: {filename}")
28
 
29
  def estimate_brightness(img):
30
- """Estimate image brightness to detect illuminated displays."""
31
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
32
  return np.mean(gray)
33
 
34
- def preprocess_image(img, scale=1.0, method='clahe'):
35
- """Preprocess image for better OCR accuracy."""
36
- if scale != 1.0:
37
- img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
38
- save_debug_image(img, f"01_preprocess_scaled_{scale}")
39
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
40
- # Gentle denoising
41
- denoised = cv2.bilateralFilter(gray, 7, 10, 10)
42
- save_debug_image(denoised, "02_preprocess_bilateral")
43
- # Enhance contrast
44
- if method == 'clahe':
45
- clahe = cv2.createCLAHE(clipLimit=3.5, tileGridSize=(8, 8))
46
- enhanced = clahe.apply(denoised)
47
- else: # Histogram equalization
48
- enhanced = cv2.equalizeHist(denoised)
49
- save_debug_image(enhanced, f"03_preprocess_{method}")
50
- # Sharpen
51
- kernel_sharpening = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]])
52
- sharpened = cv2.filter2D(enhanced, -1, kernel_sharpening)
53
- save_debug_image(sharpened, "04_preprocess_sharpened")
54
- return sharpened
55
 
56
  def correct_rotation(img):
57
- """Correct image rotation using Hough Transform."""
58
  try:
59
  edges = cv2.Canny(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), 50, 150)
60
- lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=50, minLineLength=40, maxLineGap=10)
61
  if lines is not None:
62
  angles = [np.arctan2(line[0][3] - line[0][1], line[0][2] - line[0][0]) * 180 / np.pi for line in lines]
63
  angle = np.median(angles)
64
  if abs(angle) > 2:
65
- (h, w) = img.shape[:2]
66
  center = (w // 2, h // 2)
67
  M = cv2.getRotationMatrix2D(center, angle, 1.0)
68
  img = cv2.warpAffine(img, M, (w, h))
69
  save_debug_image(img, "00_rotated_image")
70
- logging.info(f"Applied rotation correction: {angle:.2f} degrees")
71
  return img
72
  except Exception as e:
73
  logging.error(f"Rotation correction failed: {str(e)}")
74
  return img
75
 
76
  def detect_roi(img):
77
- """Detect and crop the region of interest (likely the digital display)."""
78
  try:
79
- save_debug_image(img, "05_original")
80
- brightness_map = cv2.GaussianBlur(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), (15, 15), 0)
81
-
82
- # Try multiple scales and methods
83
- scales = [1.0, 1.5, 0.5]
84
- methods = ['clahe', 'hist']
85
- for scale in scales:
86
- for method in methods:
87
- preprocessed = preprocess_image(img, scale, method)
88
- block_size = max(9, min(31, int(img.shape[0] / 25) * 2 + 1))
89
- thresh = cv2.adaptiveThreshold(preprocessed, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
90
- cv2.THRESH_BINARY_INV, block_size, 3)
91
- _, otsu_thresh = cv2.threshold(preprocessed, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
92
- combined_thresh = cv2.bitwise_and(thresh, otsu_thresh)
93
- save_debug_image(combined_thresh, f"06_roi_combined_threshold_scale_{scale}_{method}")
94
-
95
- # Morphological operations
96
- kernel = np.ones((3, 3), np.uint8)
97
- dilated = cv2.dilate(combined_thresh, kernel, iterations=2)
98
- eroded = cv2.erode(dilated, kernel, iterations=1)
99
- save_debug_image(eroded, f"07_roi_morphological_scale_{scale}_{method}")
100
-
101
- contours, _ = cv2.findContours(eroded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
102
-
103
- if contours:
104
- img_area = img.shape[0] * img.shape[1]
105
- valid_contours = []
106
- for c in contours:
107
- area = cv2.contourArea(c)
108
- x, y, w, h = cv2.boundingRect(c)
109
- roi_brightness = np.mean(brightness_map[y:y+h, x:x+w] if scale == 1.0 else cv2.resize(brightness_map, (img.shape[1], img.shape[0])))
110
- aspect_ratio = w / h
111
- if (100 < area < (img_area * 0.95) and
112
- 0.3 <= aspect_ratio <= 20.0 and w > 40 and h > 15 and roi_brightness > 50):
113
- valid_contours.append((c, roi_brightness))
114
- logging.debug(f"Contour: Scale={scale}, Method={method}, Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
115
-
116
- if valid_contours:
117
- contour, _ = max(valid_contours, key=lambda x: x[1])
118
- x, y, w, h = cv2.boundingRect(contour)
119
- if scale != 1.0:
120
- x, y, w, h = [int(v / scale) for v in (x, y, w, h)]
121
- padding = 150
122
- x, y = max(0, x - padding), max(0, y - padding)
123
- w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
124
- roi_img = img[y:y+h, x:x+w]
125
- save_debug_image(roi_img, f"08_detected_roi_scale_{scale}_{method}")
126
- logging.info(f"Detected ROI with dimensions: ({x}, {y}, {w}, {h}) at scale {scale}, method {method}")
127
- return roi_img, (x, y, w, h)
128
-
129
- logging.info("No suitable ROI found, attempting fallback criteria.")
130
- # Fallback with relaxed criteria
131
- preprocessed = preprocess_image(img, method='clahe')
132
  thresh = cv2.adaptiveThreshold(preprocessed, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
133
- cv2.THRESH_BINARY_INV, block_size, 5)
134
- save_debug_image(thresh, "06_roi_fallback_threshold")
135
  contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
136
- valid_contours = [c for c in contours if 50 < cv2.contourArea(c) < (img.shape[0] * img.shape[1] * 0.95) and
137
- 0.2 <= cv2.boundingRect(c)[2]/cv2.boundingRect(c)[3] <= 25.0]
138
- if valid_contours:
139
- contour = max(valid_contours, key=cv2.contourArea)
140
- x, y, w, h = cv2.boundingRect(contour)
141
- padding = 150
142
- x, y = max(0, x - padding), max(0, y - padding)
143
- w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
144
- roi_img = img[y:y+h, x:x+w]
145
- save_debug_image(roi_img, "08_detected_roi_fallback")
146
- logging.info(f"Detected fallback ROI with dimensions: ({x}, {y}, {w}, {h})")
147
- return roi_img, (x, y, w, h)
148
 
149
- logging.info("No suitable ROI found, returning original image.")
150
- save_debug_image(img, "08_no_roi_original_fallback")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  return img, None
152
  except Exception as e:
153
  logging.error(f"ROI detection failed: {str(e)}")
154
- save_debug_image(img, "08_roi_detection_error_fallback")
155
  return img, None
156
 
157
  def detect_segments(digit_img, brightness):
158
- """Detect seven-segment patterns in a digit image."""
159
  h, w = digit_img.shape
160
- if h < 8 or w < 6:
161
  return None
162
 
163
  segments = {
@@ -180,7 +131,7 @@ def detect_segments(digit_img, brightness):
180
  continue
181
  pixel_count = np.sum(region == 255)
182
  total_pixels = region.size
183
- segment_presence[name] = pixel_count / total_pixels > (0.15 if brightness < 80 else 0.35)
184
 
185
  digit_patterns = {
186
  '0': ('top', 'bottom', 'left_top', 'left_bottom', 'right_top', 'right_bottom'),
@@ -200,46 +151,38 @@ def detect_segments(digit_img, brightness):
200
  for digit, pattern in digit_patterns.items():
201
  matches = sum(1 for segment in pattern if segment_presence.get(segment, False))
202
  non_matches_penalty = sum(1 for segment in segment_presence if segment not in pattern and segment_presence[segment])
203
- score = matches - 0.15 * non_matches_penalty
204
- if matches >= len(pattern) * 0.65:
205
  score += 1.0
206
  if score > max_score:
207
  max_score = score
208
  best_match = digit
209
 
210
- logging.debug(f"Segment presence: {segment_presence}, Detected digit: {best_match}")
211
  return best_match
212
 
213
  def custom_seven_segment_ocr(img, roi_bbox):
214
- """Perform custom OCR for seven-segment displays."""
215
  try:
216
- preprocessed = preprocess_image(img, method='clahe')
217
  brightness = estimate_brightness(img)
218
- thresh_value = 60 if brightness < 80 else 0
219
- _, thresh = cv2.threshold(preprocessed, thresh_value, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
220
- save_debug_image(thresh, "09_roi_thresh_for_digits")
221
-
222
- # Morphological operations
223
- kernel = np.ones((3, 3), np.uint8)
224
- thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
225
- save_debug_image(thresh, "10_morph_closed")
226
-
227
- batch_size = max(4, min(16, int(img.shape[0] * img.shape[1] / 100000)))
228
  results = easyocr_reader.readtext(thresh, detail=1, paragraph=False,
229
- contrast_ths=0.1, adjust_contrast=1.3,
230
- text_threshold=0.3, mag_ratio=6.0,
231
- allowlist='0123456789.', batch_size=batch_size, y_ths=0.4)
232
 
233
- logging.info(f"EasyOCR results (seven-segment): {results}")
234
  if not results:
235
- logging.info("EasyOCR found no digits in seven-segment OCR.")
236
  return None
237
 
238
  digits_info = []
239
  for (bbox, text, conf) in results:
240
  (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
241
  h_bbox = max(y1, y2, y3, y4) - min(y1, y2, y3, y4)
242
- if (text.isdigit() or text == '.') and h_bbox > 5:
243
  x_min, x_max = int(min(x1, x4)), int(max(x2, x3))
244
  y_min, y_max = int(min(y1, y2)), int(max(y3, y4))
245
  digits_info.append((x_min, x_max, y_min, y_max, text, conf))
@@ -252,14 +195,14 @@ def custom_seven_segment_ocr(img, roi_bbox):
252
  if x_max <= x_min or y_max <= y_min:
253
  continue
254
  digit_img_crop = thresh[y_min:y_max, x_min:x_max]
255
- save_debug_image(digit_img_crop, f"11_digit_crop_{idx}_{easyocr_char}")
256
- if easyocr_conf > 0.85 or easyocr_char == '.':
257
  recognized_text += easyocr_char
258
  else:
259
  digit_from_segments = detect_segments(digit_img_crop, brightness)
260
  recognized_text += digit_from_segments if digit_from_segments else easyocr_char
261
 
262
- logging.info(f"Before validation, recognized_text: {recognized_text}")
263
  text = re.sub(r"[^\d\.]", "", recognized_text)
264
  if text.count('.') > 1:
265
  text = text.replace('.', '', text.count('.') - 1)
@@ -268,122 +211,108 @@ def custom_seven_segment_ocr(img, roi_bbox):
268
  if text == '':
269
  return None
270
  return text.lstrip('0') or '0'
271
- logging.info(f"Custom OCR text '{recognized_text}' failed validation.")
272
  return None
273
  except Exception as e:
274
- logging.error(f"Custom seven-segment OCR failed: {str(e)}")
275
  return None
276
 
277
  def extract_weight_from_image(pil_img):
278
- """Extract weight from a PIL image of a digital scale display."""
279
  try:
280
  img = np.array(pil_img)
281
  img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
282
  save_debug_image(img, "00_input_image")
283
-
284
- # Apply rotation correction
285
  img = correct_rotation(img)
286
-
287
  brightness = estimate_brightness(img)
288
- conf_threshold = 0.65 if brightness > 150 else (0.45 if brightness > 80 else 0.25)
289
 
290
  roi_img, roi_bbox = detect_roi(img)
291
  if roi_bbox:
292
- roi_area = roi_bbox[2] * roi_bbox[3]
293
- conf_threshold *= 1.1 if roi_area > (img.shape[0] * img.shape[1] * 0.5) else 1.0
294
 
295
  custom_result = custom_seven_segment_ocr(roi_img, roi_bbox)
296
  if custom_result and custom_result != '0':
297
  try:
298
  weight = float(custom_result)
299
- if 0.0001 <= weight <= 5000:
300
- logging.info(f"Custom OCR result: {custom_result}, Confidence: 95.0%")
301
- return custom_result, 95.0
302
- else:
303
- logging.warning(f"Custom OCR result {custom_result} outside typical weight range.")
304
  except ValueError:
305
- logging.warning(f"Custom OCR result '{custom_result}' is not a valid number.")
306
 
307
- logging.info("Custom OCR failed or invalid, falling back to enhanced EasyOCR.")
308
- preprocessed_roi = preprocess_image(roi_img, method='hist')
309
- block_size = max(9, min(31, int(roi_img.shape[0] / 25) * 2 + 1))
310
  final_roi = cv2.adaptiveThreshold(preprocessed_roi, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
311
- cv2.THRESH_BINARY_INV, block_size, 5)
312
- save_debug_image(final_roi, "12_fallback_adaptive_thresh")
313
 
314
- batch_size = max(4, min(16, int(roi_img.shape[0] * roi_img.shape[1] / 100000)))
315
- ocr_passes = [
316
- {'contrast_ths': 0.2, 'text_threshold': 0.3, 'mag_ratio': 6.0, 'y_ths': 0.4, 'label': 'first'},
317
- {'contrast_ths': 0.1, 'text_threshold': 0.2, 'mag_ratio': 7.0, 'y_ths': 0.5, 'label': 'second'},
318
- {'contrast_ths': 0.05, 'text_threshold': 0.1, 'mag_ratio': 8.0, 'y_ths': 0.6, 'label': 'third'}
319
- ]
320
- candidates = []
321
 
322
- for ocr_pass in ocr_passes:
 
323
  results = easyocr_reader.readtext(final_roi, detail=1, paragraph=False,
324
- contrast_ths=ocr_pass['contrast_ths'],
325
- adjust_contrast=1.4,
326
- text_threshold=ocr_pass['text_threshold'],
327
- mag_ratio=ocr_pass['mag_ratio'],
328
- allowlist='0123456789. kglb',
329
- batch_size=batch_size,
330
- y_ths=ocr_pass['y_ths'])
331
- logging.info(f"EasyOCR results ({ocr_pass['label']} pass): {results}")
332
- save_debug_image(final_roi, f"12_fallback_adaptive_thresh_{ocr_pass['label']}_pass")
333
-
334
- unit = None
335
- for (bbox, text, conf) in results:
336
- if 'kg' in text.lower():
337
- unit = 'kg'
338
- continue
339
- elif 'g' in text.lower():
340
- unit = 'g'
341
- continue
342
- elif 'lb' in text.lower():
343
- unit = 'lb'
344
- continue
345
- text = re.sub(r"[^\d\.]", "", text)
346
- if text.count('.') > 1:
347
- text = text.replace('.', '', text.count('.') - 1)
348
- text = text.strip('.')
349
- if re.fullmatch(r"^\d*\.?\d*$", text):
350
- try:
351
- weight = float(text)
352
- if unit == 'g':
353
- weight /= 1000
354
- elif unit == 'lb':
355
- weight *= 0.453592
356
- range_score = 1.5 if 0.0001 <= weight <= 5000 else 0.6
357
- digit_count = len(text.replace('.', ''))
358
- digit_score = 1.4 if 1 <= digit_count <= 8 else 0.7
359
- score = conf * range_score * digit_score
360
- if roi_bbox:
361
- (x_roi, y_roi, w_roi, h_roi) = roi_bbox
362
- roi_area = w_roi * h_roi
363
- x_min, y_min = int(min(b[0] for b in bbox)), int(min(b[1] for b in bbox))
364
- x_max, y_max = int(max(b[0] for b in bbox)), int(max(b[1] for b in bbox))
365
- bbox_area = (x_max - x_min) * (y_max - y_min)
366
- if roi_area > 0 and bbox_area / roi_area < 0.02:
367
- score *= 0.4
368
- candidates.append((text, conf, score, unit))
369
- logging.info(f"Candidate EasyOCR weight: '{text}', Unit: {unit or 'none'}, Conf: {conf}, Score: {score}")
370
- except ValueError:
371
- logging.warning(f"Could not convert '{text}' to float during EasyOCR fallback.")
372
 
373
- # Fallback to full image if no candidates
374
- if not candidates:
375
- logging.info("No candidates from ROI, trying full image.")
376
- preprocessed_full = preprocess_image(img, method='hist')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
377
  final_full = cv2.adaptiveThreshold(preprocessed_full, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
378
- cv2.THRESH_BINARY_INV, block_size, 5)
379
- save_debug_image(final_full, "12_fallback_full_image")
380
  results = easyocr_reader.readtext(final_full, detail=1, paragraph=False,
381
- contrast_ths=0.1, adjust_contrast=1.5,
382
- text_threshold=0.2, mag_ratio=7.0,
383
- allowlist='0123456789. kglb', batch_size=batch_size, y_ths=0.5)
384
- logging.info(f"EasyOCR results (full image): {results}")
385
-
386
- unit = None
387
  for (bbox, text, conf) in results:
388
  if 'kg' in text.lower():
389
  unit = 'kg'
@@ -405,23 +334,20 @@ def extract_weight_from_image(pil_img):
405
  weight /= 1000
406
  elif unit == 'lb':
407
  weight *= 0.453592
408
- range_score = 1.2 if 0.0001 <= weight <= 5000 else 0.5
409
  digit_count = len(text.replace('.', ''))
410
- digit_score = 1.2 if 1 <= digit_count <= 8 else 0.6
411
- score = conf * range_score * digit_score * 0.8 # Penalty for full image
412
  candidates.append((text, conf, score, unit))
413
- logging.info(f"Candidate EasyOCR weight (full image): '{text}', Unit: {unit or 'none'}, Conf: {conf}, Score: {score}")
414
  except ValueError:
415
- logging.warning(f"Could not convert '{text}' to float during full image fallback.")
416
 
417
  if not candidates:
418
- logging.info("No valid weight detected after all attempts.")
419
  return "Not detected", 0.0
420
 
421
- # Select best candidate
422
  best_weight, best_conf, best_score, best_unit = max(candidates, key=lambda x: x[2])
423
-
424
- # Format the weight
425
  if "." in best_weight:
426
  int_part, dec_part = best_weight.split(".")
427
  int_part = int_part.lstrip("0") or "0"
@@ -432,16 +358,15 @@ def extract_weight_from_image(pil_img):
432
 
433
  try:
434
  final_weight = float(best_weight)
435
- if final_weight < 0.0001 or final_weight > 5000:
436
- best_conf *= 0.5
437
  elif final_weight == 0 and best_conf < 0.95:
438
- best_conf *= 0.6 # Penalize zero weights
439
  except ValueError:
440
  pass
441
 
442
- logging.info(f"Final detected weight: {best_weight} kg, Confidence: {round(best_conf * 100, 2)}%, Unit: {best_unit or 'none'}")
443
  return best_weight, round(best_conf * 100, 2)
444
-
445
  except Exception as e:
446
- logging.error(f"Weight extraction failed unexpectedly: {str(e)}")
447
  return "Not detected", 0.0
 
6
  from datetime import datetime
7
  import os
8
 
9
+ # Set up logging
10
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
11
 
12
  # Initialize EasyOCR
 
17
  os.makedirs(DEBUG_DIR, exist_ok=True)
18
 
19
  def save_debug_image(img, filename_suffix, prefix=""):
20
+ """Save image to debug directory with timestamp."""
21
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
22
  filename = os.path.join(DEBUG_DIR, f"{prefix}{timestamp}_{filename_suffix}.png")
23
+ if len(img.shape) == 3:
24
  cv2.imwrite(filename, img)
25
+ else:
26
  cv2.imwrite(filename, img)
27
  logging.info(f"Saved debug image: {filename}")
28
 
29
  def estimate_brightness(img):
30
+ """Estimate image brightness."""
31
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
32
  return np.mean(gray)
33
 
34
+ def preprocess_image(img):
35
+ """Preprocess image for OCR."""
 
 
 
36
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
37
+ denoised = cv2.bilateralFilter(gray, 5, 8, 8)
38
+ save_debug_image(denoised, "01_preprocess_bilateral")
39
+ clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
40
+ enhanced = clahe.apply(denoised)
41
+ save_debug_image(enhanced, "02_preprocess_clahe")
42
+ return enhanced
 
 
 
 
 
 
 
 
 
43
 
44
  def correct_rotation(img):
45
+ """Correct image rotation."""
46
  try:
47
  edges = cv2.Canny(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), 50, 150)
48
+ lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=50, minLineLength=30, maxLineGap=10)
49
  if lines is not None:
50
  angles = [np.arctan2(line[0][3] - line[0][1], line[0][2] - line[0][0]) * 180 / np.pi for line in lines]
51
  angle = np.median(angles)
52
  if abs(angle) > 2:
53
+ h, w = img.shape[:2]
54
  center = (w // 2, h // 2)
55
  M = cv2.getRotationMatrix2D(center, angle, 1.0)
56
  img = cv2.warpAffine(img, M, (w, h))
57
  save_debug_image(img, "00_rotated_image")
58
+ logging.info(f"Applied rotation: {angle:.2f} degrees")
59
  return img
60
  except Exception as e:
61
  logging.error(f"Rotation correction failed: {str(e)}")
62
  return img
63
 
64
  def detect_roi(img):
65
+ """Detect region of interest (display)."""
66
  try:
67
+ save_debug_image(img, "03_original")
68
+ preprocessed = preprocess_image(img)
69
+ brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
70
+ block_size = max(9, min(31, int(img.shape[0] / 25) * 2 + 1))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  thresh = cv2.adaptiveThreshold(preprocessed, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
72
+ cv2.THRESH_BINARY_INV, block_size, 2)
73
+ save_debug_image(thresh, "04_roi_threshold")
74
  contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
+ if contours:
77
+ img_area = img.shape[0] * img.shape[1]
78
+ valid_contours = []
79
+ for c in contours:
80
+ area = cv2.contourArea(c)
81
+ x, y, w, h = cv2.boundingRect(c)
82
+ roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
83
+ aspect_ratio = w / h
84
+ if (50 < area < (img_area * 0.95) and
85
+ 0.2 <= aspect_ratio <= 30.0 and w > 30 and h > 10 and roi_brightness > 30):
86
+ valid_contours.append((c, roi_brightness))
87
+ logging.debug(f"Contour: Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
88
+
89
+ if valid_contours:
90
+ contour, _ = max(valid_contours, key=lambda x: x[1])
91
+ x, y, w, h = cv2.boundingRect(contour)
92
+ padding = 200
93
+ x, y = max(0, x - padding), max(0, y - padding)
94
+ w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
95
+ roi_img = img[y:y+h, x:x+w]
96
+ save_debug_image(roi_img, "05_detected_roi")
97
+ logging.info(f"Detected ROI: ({x}, {y}, {w}, {h})")
98
+ return roi_img, (x, y, w, h)
99
+
100
+ logging.info("No ROI found, using full image.")
101
+ save_debug_image(img, "05_no_roi_fallback")
102
  return img, None
103
  except Exception as e:
104
  logging.error(f"ROI detection failed: {str(e)}")
105
+ save_debug_image(img, "05_roi_error_fallback")
106
  return img, None
107
 
108
  def detect_segments(digit_img, brightness):
109
+ """Detect seven-segment digits."""
110
  h, w = digit_img.shape
111
+ if h < 5 or w < 3:
112
  return None
113
 
114
  segments = {
 
131
  continue
132
  pixel_count = np.sum(region == 255)
133
  total_pixels = region.size
134
+ segment_presence[name] = pixel_count / total_pixels > (0.1 if brightness < 80 else 0.25)
135
 
136
  digit_patterns = {
137
  '0': ('top', 'bottom', 'left_top', 'left_bottom', 'right_top', 'right_bottom'),
 
151
  for digit, pattern in digit_patterns.items():
152
  matches = sum(1 for segment in pattern if segment_presence.get(segment, False))
153
  non_matches_penalty = sum(1 for segment in segment_presence if segment not in pattern and segment_presence[segment])
154
+ score = matches - 0.1 * non_matches_penalty
155
+ if matches >= len(pattern) * 0.55:
156
  score += 1.0
157
  if score > max_score:
158
  max_score = score
159
  best_match = digit
160
 
161
+ logging.debug(f"Segment presence: {segment_presence}, Digit: {best_match}")
162
  return best_match
163
 
164
  def custom_seven_segment_ocr(img, roi_bbox):
165
+ """Perform OCR for seven-segment displays."""
166
  try:
167
+ preprocessed = preprocess_image(img)
168
  brightness = estimate_brightness(img)
169
+ _, thresh = cv2.threshold(preprocessed, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
170
+ save_debug_image(thresh, "06_roi_thresh_digits")
 
 
 
 
 
 
 
 
171
  results = easyocr_reader.readtext(thresh, detail=1, paragraph=False,
172
+ contrast_ths=0.05, adjust_contrast=1.2,
173
+ text_threshold=0.15, mag_ratio=4.0,
174
+ allowlist='0123456789.', batch_size=2, y_ths=0.3)
175
 
176
+ logging.info(f"EasyOCR results: {results}")
177
  if not results:
178
+ logging.info("No digits found.")
179
  return None
180
 
181
  digits_info = []
182
  for (bbox, text, conf) in results:
183
  (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
184
  h_bbox = max(y1, y2, y3, y4) - min(y1, y2, y3, y4)
185
+ if (text.isdigit() or text == '.') and h_bbox > 4:
186
  x_min, x_max = int(min(x1, x4)), int(max(x2, x3))
187
  y_min, y_max = int(min(y1, y2)), int(max(y3, y4))
188
  digits_info.append((x_min, x_max, y_min, y_max, text, conf))
 
195
  if x_max <= x_min or y_max <= y_min:
196
  continue
197
  digit_img_crop = thresh[y_min:y_max, x_min:x_max]
198
+ save_debug_image(digit_img_crop, f"07_digit_crop_{idx}_{easyocr_char}")
199
+ if easyocr_conf > 0.8 or easyocr_char == '.':
200
  recognized_text += easyocr_char
201
  else:
202
  digit_from_segments = detect_segments(digit_img_crop, brightness)
203
  recognized_text += digit_from_segments if digit_from_segments else easyocr_char
204
 
205
+ logging.info(f"Recognized text: {recognized_text}")
206
  text = re.sub(r"[^\d\.]", "", recognized_text)
207
  if text.count('.') > 1:
208
  text = text.replace('.', '', text.count('.') - 1)
 
211
  if text == '':
212
  return None
213
  return text.lstrip('0') or '0'
214
+ logging.info(f"Text '{recognized_text}' failed validation.")
215
  return None
216
  except Exception as e:
217
+ logging.error(f"Seven-segment OCR failed: {str(e)}")
218
  return None
219
 
220
  def extract_weight_from_image(pil_img):
221
+ """Extract weight from a digital scale image."""
222
  try:
223
  img = np.array(pil_img)
224
  img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
225
  save_debug_image(img, "00_input_image")
 
 
226
  img = correct_rotation(img)
 
227
  brightness = estimate_brightness(img)
228
+ conf_threshold = 0.6 if brightness > 150 else (0.4 if brightness > 80 else 0.2)
229
 
230
  roi_img, roi_bbox = detect_roi(img)
231
  if roi_bbox:
232
+ conf_threshold *= 1.05 if (roi_bbox[2] * roi_bbox[3]) > (img.shape[0] * img.shape[1] * 0.5) else 1.0
 
233
 
234
  custom_result = custom_seven_segment_ocr(roi_img, roi_bbox)
235
  if custom_result and custom_result != '0':
236
  try:
237
  weight = float(custom_result)
238
+ if 0.00001 <= weight <= 10000:
239
+ logging.info(f"Custom OCR: {custom_result}, Confidence: 90.0%")
240
+ return custom_result, 90.0
241
+ logging.warning(f"Custom OCR {custom_result} out of range.")
 
242
  except ValueError:
243
+ logging.warning(f"Custom OCR '{custom_result}' invalid number.")
244
 
245
+ logging.info("Custom OCR failed, using EasyOCR fallback.")
246
+ preprocessed_roi = preprocess_image(roi_img)
 
247
  final_roi = cv2.adaptiveThreshold(preprocessed_roi, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
248
+ cv2.THRESH_BINARY_INV, max(9, min(31, int(roi_img.shape[0] / 25) * 2 + 1)), 2)
249
+ save_debug_image(final_roi, "08_fallback_thresh")
250
 
251
+ results = easyocr_reader.readtext(final_roi, detail=1, paragraph=False,
252
+ contrast_ths=0.05, adjust_contrast=1.2,
253
+ text_threshold=0.15, mag_ratio=4.0,
254
+ allowlist='0123456789. kglb', batch_size=2, y_ths=0.3)
 
 
 
255
 
256
+ if not results:
257
+ logging.info("First EasyOCR pass failed, trying fallback.")
258
  results = easyocr_reader.readtext(final_roi, detail=1, paragraph=False,
259
+ contrast_ths=0.02, adjust_contrast=1.5,
260
+ text_threshold=0.1, mag_ratio=5.0,
261
+ allowlist='0123456789. kglb', batch_size=2, y_ths=0.3)
262
+ save_debug_image(final_roi, "08_fallback_thresh_fallback")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
 
264
+ logging.info(f"EasyOCR results: {results}")
265
+ candidates = []
266
+ unit = None
267
+ for (bbox, text, conf) in results:
268
+ if 'kg' in text.lower():
269
+ unit = 'kg'
270
+ continue
271
+ elif 'g' in text.lower():
272
+ unit = 'g'
273
+ continue
274
+ elif 'lb' in text.lower():
275
+ unit = 'lb'
276
+ continue
277
+ text = re.sub(r"[^\d\.]", "", text)
278
+ if text.count('.') > 1:
279
+ text = text.replace('.', '', text.count('.') - 1)
280
+ text = text.strip('.')
281
+ if re.fullmatch(r"^\d*\.?\d*$", text):
282
+ try:
283
+ weight = float(text)
284
+ if unit == 'g':
285
+ weight /= 1000
286
+ elif unit == 'lb':
287
+ weight *= 0.453592
288
+ range_score = 1.5 if 0.00001 <= weight <= 10000 else 0.5
289
+ digit_count = len(text.replace('.', ''))
290
+ digit_score = 1.4 if 1 <= digit_count <= 8 else 0.6
291
+ score = conf * range_score * digit_score
292
+ if roi_bbox:
293
+ x_roi, y_roi, w_roi, h_roi = roi_bbox
294
+ roi_area = w_roi * h_roi
295
+ x_min, y_min = int(min(b[0] for b in bbox)), int(min(b[1] for b in bbox))
296
+ x_max, y_max = int(max(b[0] for b in bbox)), int(max(b[1] for b in bbox))
297
+ bbox_area = (x_max - x_min) * (y_max - y_min)
298
+ if roi_area > 0 and bbox_area / roi_area < 0.02:
299
+ score *= 0.4
300
+ candidates.append((text, conf, score, unit))
301
+ logging.info(f"Candidate: '{text}', Unit: {unit or 'none'}, Conf: {conf}, Score: {score}")
302
+ except ValueError:
303
+ logging.warning(f"Could not convert '{text}' to float.")
304
+
305
+ if not candidates and not roi_bbox:
306
+ logging.info("No candidates, trying full image.")
307
+ preprocessed_full = preprocess_image(img)
308
  final_full = cv2.adaptiveThreshold(preprocessed_full, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
309
+ cv2.THRESH_BINARY_INV, max(9, min(31, int(img.shape[0] / 25) * 2 + 1)), 2)
310
+ save_debug_image(final_full, "08_fallback_full")
311
  results = easyocr_reader.readtext(final_full, detail=1, paragraph=False,
312
+ contrast_ths=0.05, adjust_contrast=1.5,
313
+ text_threshold=0.15, mag_ratio=4.0,
314
+ allowlist='0123456789. kglb', batch_size=2, y_ths=0.3)
315
+ logging.info(f"Full image EasyOCR: {results}")
 
 
316
  for (bbox, text, conf) in results:
317
  if 'kg' in text.lower():
318
  unit = 'kg'
 
334
  weight /= 1000
335
  elif unit == 'lb':
336
  weight *= 0.453592
337
+ range_score = 1.2 if 0.00001 <= weight <= 10000 else 0.4
338
  digit_count = len(text.replace('.', ''))
339
+ digit_score = 1.2 if 1 <= digit_count <= 8 else 0.5
340
+ score = conf * range_score * digit_score * 0.7
341
  candidates.append((text, conf, score, unit))
342
+ logging.info(f"Full image candidate: '{text}', Unit: {unit or 'none'}, Conf: {conf}, Score: {score}")
343
  except ValueError:
344
+ logging.warning(f"Could not convert '{text}' to float (full image).")
345
 
346
  if not candidates:
347
+ logging.info("No valid weight detected.")
348
  return "Not detected", 0.0
349
 
 
350
  best_weight, best_conf, best_score, best_unit = max(candidates, key=lambda x: x[2])
 
 
351
  if "." in best_weight:
352
  int_part, dec_part = best_weight.split(".")
353
  int_part = int_part.lstrip("0") or "0"
 
358
 
359
  try:
360
  final_weight = float(best_weight)
361
+ if final_weight < 0.00001 or final_weight > 10000:
362
+ best_conf *= 0.4
363
  elif final_weight == 0 and best_conf < 0.95:
364
+ best_conf *= 0.5
365
  except ValueError:
366
  pass
367
 
368
+ logging.info(f"Final weight: {best_weight} kg, Confidence: {round(best_conf * 100, 2)}%, Unit: {best_unit or 'none'}")
369
  return best_weight, round(best_conf * 100, 2)
 
370
  except Exception as e:
371
+ logging.error(f"Weight extraction failed: {str(e)}")
372
  return "Not detected", 0.0