Sanjayraju30 commited on
Commit
6dfd01b
·
verified ·
1 Parent(s): 4ec2c37

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +75 -192
ocr_engine.py CHANGED
@@ -10,8 +10,6 @@ import os
10
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
11
 
12
  # Initialize EasyOCR
13
- # Consider using 'en' and potentially 'ch_sim' or other relevant languages if your scales have non-English characters.
14
- # gpu=True can speed up processing if a compatible GPU is available.
15
  easyocr_reader = easyocr.Reader(['en'], gpu=False)
16
 
17
  # Directory for debug images
@@ -22,13 +20,12 @@ def save_debug_image(img, filename_suffix, prefix=""):
22
  """Saves an image to the debug directory with a timestamp."""
23
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
24
  filename = os.path.join(DEBUG_DIR, f"{prefix}{timestamp}_{filename_suffix}.png")
25
- if len(img.shape) == 3: # Color image
26
  cv2.imwrite(filename, img)
27
- else: # Grayscale image
28
  cv2.imwrite(filename, img)
29
  logging.info(f"Saved debug image: {filename}")
30
 
31
-
32
  def estimate_brightness(img):
33
  """Estimate image brightness to detect illuminated displays"""
34
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
@@ -41,60 +38,41 @@ def detect_roi(img):
41
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
42
  save_debug_image(gray, "02_grayscale")
43
 
44
- brightness = estimate_brightness(img)
45
-
46
- # Adaptive thresholding based on brightness
47
- # For darker images, a lower threshold might be needed.
48
- # For very bright images, a higher threshold.
49
- # Tuned thresholds based on observed values
50
- if brightness > 180:
51
- thresh_value = 230
52
- elif brightness > 100:
53
- thresh_value = 190
54
- else:
55
- thresh_value = 150 # Even lower for very dark images
56
-
57
- _, thresh = cv2.threshold(gray, thresh_value, 255, cv2.THRESH_BINARY)
58
- save_debug_image(thresh, f"03_roi_threshold_{thresh_value}")
59
 
60
- # Increased kernel size for dilation to better connect segments of digits
61
- # This helps in forming a solid contour for the display
62
- kernel = np.ones((13, 13), np.uint8) # Slightly larger kernel
63
- dilated = cv2.dilate(thresh, kernel, iterations=5) # Increased iterations for stronger connection
64
  save_debug_image(dilated, "04_roi_dilated")
65
 
66
  contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
67
 
68
  if contours:
69
- # Filter contours by a more robust area range and shape
70
  img_area = img.shape[0] * img.shape[1]
71
  valid_contours = []
72
  for c in contours:
73
  area = cv2.contourArea(c)
74
- # Filter out very small and very large contours (e.g., entire image, or noise)
75
- if 1500 < area < (img_area * 0.9): # Increased min area, max area
76
  x, y, w, h = cv2.boundingRect(c)
77
  aspect_ratio = w / h
78
- # Check for typical display aspect ratios and minimum size
79
- if 2.0 <= aspect_ratio <= 5.5 and w > 100 and h > 50: # Adjusted aspect ratio and min size
80
  valid_contours.append(c)
81
 
82
  if valid_contours:
83
- # Sort by area descending and iterate
84
  for contour in sorted(valid_contours, key=cv2.contourArea, reverse=True):
85
  x, y, w, h = cv2.boundingRect(contour)
86
-
87
- # Expand ROI to ensure full digits are captured and a small border
88
- padding = 40 # Increased padding
89
  x, y = max(0, x - padding), max(0, y - padding)
90
  w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
91
-
92
  roi_img = img[y:y+h, x:x+w]
93
  save_debug_image(roi_img, "05_detected_roi")
94
  logging.info(f"Detected ROI with dimensions: ({x}, {y}, {w}, {h})")
95
  return roi_img, (x, y, w, h)
96
 
97
- logging.info("No suitable ROI found, returning original image for full image OCR attempt.")
98
  save_debug_image(img, "05_no_roi_original_fallback")
99
  return img, None
100
  except Exception as e:
@@ -105,11 +83,9 @@ def detect_roi(img):
105
  def detect_segments(digit_img):
106
  """Detect seven-segment patterns in a digit image"""
107
  h, w = digit_img.shape
108
- if h < 15 or w < 10: # Increased minimum dimensions for a digit
109
  return None
110
 
111
- # Define segment regions (top, middle, bottom, left-top, left-bottom, right-top, right-bottom)
112
- # Adjusted segment proportions for better robustness, more aggressive cropping
113
  segments = {
114
  'top': (int(w*0.15), int(w*0.85), 0, int(h*0.2)),
115
  'middle': (int(w*0.15), int(w*0.85), int(h*0.4), int(h*0.6)),
@@ -122,24 +98,16 @@ def detect_segments(digit_img):
122
 
123
  segment_presence = {}
124
  for name, (x1, x2, y1, y2) in segments.items():
125
- # Ensure coordinates are within bounds
126
  x1, y1 = max(0, x1), max(0, y1)
127
  x2, y2 = min(w, x2), min(h, y2)
128
-
129
  region = digit_img[y1:y2, x1:x2]
130
  if region.size == 0:
131
  segment_presence[name] = False
132
  continue
133
-
134
- # Count white pixels in the region
135
  pixel_count = np.sum(region == 255)
136
  total_pixels = region.size
137
-
138
- # Segment is present if a significant portion of the region is white
139
- # Adjusted threshold for segment presence - higher for robustness
140
- segment_presence[name] = pixel_count / total_pixels > 0.55 # Increased sensitivity further
141
 
142
- # Seven-segment digit patterns - remain the same
143
  digit_patterns = {
144
  '0': ('top', 'bottom', 'left_top', 'left_bottom', 'right_top', 'right_bottom'),
145
  '1': ('right_top', 'right_bottom'),
@@ -154,278 +122,196 @@ def detect_segments(digit_img):
154
  }
155
 
156
  best_match = None
157
- max_score = -1 # Initialize with a lower value
158
-
159
  for digit, pattern in digit_patterns.items():
160
  matches = sum(1 for segment in pattern if segment_presence.get(segment, False))
161
-
162
- # Penalize for segments that should NOT be present but are
163
  non_matches_penalty = sum(1 for segment in segment_presence if segment not in pattern and segment_presence[segment])
164
-
165
- # Prioritize digits with more matched segments and fewer incorrect segments
166
  current_score = matches - non_matches_penalty
167
-
168
- # Add a small bonus for matching exactly all required segments for the digit
169
  if all(segment_presence.get(s, False) for s in pattern):
170
- current_score += 0.5
171
-
172
  if current_score > max_score:
173
  max_score = current_score
174
  best_match = digit
175
  elif current_score == max_score and best_match is not None:
176
- # Tie-breaking: prefer digits with fewer "extra" segments when scores are equal
177
  current_digit_non_matches = sum(1 for segment in segment_presence if segment not in pattern and segment_presence[segment])
178
  best_digit_pattern = digit_patterns[best_match]
179
- best_digit_non_matches = sum(1 for segment in segment_presence if segment not in best_digit_pattern and segment_presence[best_digit_pattern]) # Corrected logic
180
  if current_digit_non_matches < best_digit_non_matches:
181
  best_match = digit
182
 
183
- # Debugging segment presence
184
- # logging.debug(f"Digit Image Shape: {digit_img.shape}, Segments: {segment_presence}, Best Match: {best_match}")
185
- # save_debug_image(digit_img, f"digit_segment_debug_{best_match or 'none'}", prefix="10_")
186
-
187
  return best_match
188
 
189
  def custom_seven_segment_ocr(img, roi_bbox):
190
  """Perform custom OCR for seven-segment displays"""
191
  try:
192
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
193
-
194
- # Adaptive thresholding for digits within ROI
195
- # Using OTSU for automatic thresholding or a fixed value depending on brightness
196
  brightness = estimate_brightness(img)
197
  if brightness > 150:
198
  _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
199
  else:
200
- _, thresh = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY) # Lower threshold for darker displays
201
  save_debug_image(thresh, "06_roi_thresh_for_digits")
202
 
203
- # Use EasyOCR to get bounding boxes for digits
204
- # Increased text_threshold for more confident digit detection
205
- # Adjusted mag_ratio for better handling of digit sizes
206
- # Added y_ths to reduce sensitivity to vertical position variations (common in scales)
207
  results = easyocr_reader.readtext(thresh, detail=1, paragraph=False,
208
- contrast_ths=0.2, adjust_contrast=0.8, # Slightly more contrast adjustment
209
- text_threshold=0.85, mag_ratio=1.5, # Adjusted mag_ratio back, seems to work better for 7-seg
210
- allowlist='0123456789.', y_ths=0.2) # Increased y_ths for row grouping tolerance
211
-
 
212
  if not results:
213
- logging.info("EasyOCR found no digits for custom seven-segment OCR.")
214
  return None
215
 
216
- # Sort bounding boxes left to right
217
  digits_info = []
218
  for (bbox, text, conf) in results:
219
- # Ensure the text found by EasyOCR is a single digit or a decimal point
220
- # Also filter by a minimum height of the bounding box for robustness
221
  (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
222
- h_bbox = max(y1,y2,y3,y4) - min(y1,y2,y3,y4)
223
- if len(text) == 1 and (text.isdigit() or text == '.') and h_bbox > 10: # Min height for bbox
224
  x_min, x_max = int(min(x1, x4)), int(max(x2, x3))
225
  y_min, y_max = int(min(y1, y2)), int(max(y3, y4))
226
  digits_info.append((x_min, x_max, y_min, y_max, text, conf))
227
 
228
- # Sort by x_min (left to right)
229
- digits_info.sort(key=lambda x: x[0])
230
-
231
  recognized_text = ""
232
  for idx, (x_min, x_max, y_min, y_max, easyocr_char, easyocr_conf) in enumerate(digits_info):
233
  x_min, y_min = max(0, x_min), max(0, y_min)
234
  x_max, y_max = min(thresh.shape[1], x_max), min(thresh.shape[0], y_max)
235
-
236
  if x_max <= x_min or y_max <= y_min:
237
  continue
238
-
239
  digit_img_crop = thresh[y_min:y_max, x_min:x_max]
240
  save_debug_image(digit_img_crop, f"07_digit_crop_{idx}_{easyocr_char}")
241
-
242
- # If EasyOCR is very confident about a digit or it's a decimal, use its result directly
243
- # Or if the digit crop is too small for reliable segment detection
244
- if easyocr_conf > 0.9 or easyocr_char == '.' or digit_img_crop.shape[0] < 20 or digit_img_crop.shape[1] < 15: # Lowered confidence for direct use
245
  recognized_text += easyocr_char
246
  else:
247
- # Otherwise, try the segment detection
248
  digit_from_segments = detect_segments(digit_img_crop)
249
  if digit_from_segments:
250
  recognized_text += digit_from_segments
251
  else:
252
- # If segment detection also fails, fall back to EasyOCR's less confident result
253
  recognized_text += easyocr_char
254
-
255
- # Validate the recognized text
256
- text = recognized_text
257
- text = re.sub(r"[^\d\.]", "", text) # Remove any non-digit/non-dot characters
258
 
259
- # Ensure there's at most one decimal point
 
260
  if text.count('.') > 1:
261
- text = text.replace('.', '', text.count('.') - 1) # Remove extra decimal points
262
-
263
- # Basic validation for common weight formats (e.g., 75.5, 120.0, 5.0)
264
- # Allow numbers to start with . (e.g., .5 -> 0.5) if it's the only character
265
- if text and re.fullmatch(r"^\d*\.?\d*$", text) and len(text.replace('.', '')) > 0:
266
- # Handle cases like ".5" -> "0.5"
267
- if text.startswith('.') and len(text) > 1:
268
  text = "0" + text
269
- # Handle cases like "5." -> "5"
270
- if text.endswith('.') and len(text) > 1:
271
  text = text.rstrip('.')
272
-
273
- # Ensure it's not just a single dot or empty after processing
274
  if text == '.' or text == '':
275
  return None
276
  return text
277
- logging.info(f"Custom OCR final text '{recognized_text}' failed validation.")
278
  return None
279
  except Exception as e:
280
  logging.error(f"Custom seven-segment OCR failed: {str(e)}")
281
  return None
282
 
283
  def extract_weight_from_image(pil_img):
 
284
  try:
285
  img = np.array(pil_img)
286
  img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
 
287
 
288
  brightness = estimate_brightness(img)
289
- # Adjust confidence threshold more dynamically
290
- conf_threshold = 0.9 if brightness > 150 else (0.8 if brightness > 80 else 0.7) # Adjusted thresholds
291
 
292
- # Detect ROI
293
  roi_img, roi_bbox = detect_roi(img)
294
-
295
- # Try custom seven-segment OCR first
296
  custom_result = custom_seven_segment_ocr(roi_img, roi_bbox)
297
  if custom_result:
298
- # Format the custom result: remove leading zeros (unless it's "0" or "0.x") and trailing zeros after decimal
299
  if "." in custom_result:
300
  int_part, dec_part = custom_result.split(".")
301
  int_part = int_part.lstrip("0") or "0"
302
  dec_part = dec_part.rstrip('0')
303
- if not dec_part and int_part != "0": # If decimal part is empty (e.g., "50."), remove the dot
304
  custom_result = int_part
305
- elif not dec_part and int_part == "0": # if it's "0." keep it as "0"
306
  custom_result = "0"
307
  else:
308
  custom_result = f"{int_part}.{dec_part}"
309
  else:
310
  custom_result = custom_result.lstrip('0') or "0"
311
-
312
- # Additional validation for custom result to ensure it's a valid number
313
  try:
314
  float(custom_result)
315
  logging.info(f"Custom OCR result: {custom_result}, Confidence: 100.0%")
316
- return custom_result, 100.0 # High confidence for custom OCR
317
  except ValueError:
318
  logging.warning(f"Custom OCR result '{custom_result}' is not a valid number, falling back.")
319
- custom_result = None # Force fallback
320
 
321
- # Fallback to EasyOCR if custom OCR fails
322
  logging.info("Custom OCR failed or invalid, falling back to general EasyOCR.")
323
-
324
- # Apply more aggressive image processing for EasyOCR if custom OCR failed
325
  processed_roi_img_gray = cv2.cvtColor(roi_img, cv2.COLOR_BGR2GRAY)
326
-
327
- # Sharpening
328
- kernel_sharpening = np.array([[-1,-1,-1],
329
- [-1,9,-1],
330
- [-1,-1,-1]])
331
  sharpened_roi = cv2.filter2D(processed_roi_img_gray, -1, kernel_sharpening)
332
  save_debug_image(sharpened_roi, "08_fallback_sharpened")
333
-
334
- # Apply adaptive thresholding to the sharpened image for better digit isolation
335
- # Block size and C constant can be critical
336
  processed_roi_img_final = cv2.adaptiveThreshold(sharpened_roi, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
337
- cv2.THRESH_BINARY, 15, 3) # Adjusted block size and C
338
  save_debug_image(processed_roi_img_final, "09_fallback_adaptive_thresh")
339
 
340
- # EasyOCR parameters for general text
341
- # Adjusted parameters for better digit recognition
342
- # added batch_size for potentially better performance on multiple texts
343
  results = easyocr_reader.readtext(processed_roi_img_final, detail=1, paragraph=False,
344
- contrast_ths=0.3, adjust_contrast=0.9,
345
- text_threshold=0.6, mag_ratio=1.8, # Lowered text_threshold, increased mag_ratio
346
- allowlist='0123456789.', batch_size=4, y_ths=0.3) # Increased y_ths
347
 
348
  best_weight = None
349
  best_conf = 0.0
350
  best_score = 0.0
351
-
352
  for (bbox, text, conf) in results:
353
  text = text.lower().strip()
354
-
355
- # More robust character replacements
356
- text = text.replace(",", ".").replace(";", ".").replace(":", ".").replace(" ", "") # Remove spaces
357
- text = text.replace("o", "0").replace("O", "0").replace("q", "0").replace("Q", "0")
358
  text = text.replace("s", "5").replace("S", "5")
359
- text = text.replace("g", "9").replace("G", "6")
360
- text = text.replace("l", "1").replace("I", "1").replace("|", "1")
361
  text = text.replace("b", "8").replace("B", "8")
362
  text = text.replace("z", "2").replace("Z", "2")
363
- text = text.replace("a", "4").replace("A", "4")
364
- text = text.replace("e", "3")
365
- text = text.replace("t", "7") # 't' can look like '7'
366
- text = text.replace("~", "") # Common noise
367
- text = text.replace("`", "")
368
-
369
- # Remove common weight units and other non-numeric characters
370
- text = re.sub(r"(kgs|kg|k|lb|g|gr|pounds|lbs)\b", "", text) # Added lbs
371
  text = re.sub(r"[^\d\.]", "", text)
372
-
373
- # Handle multiple decimal points (keep only the first one)
374
  if text.count('.') > 1:
375
  parts = text.split('.')
376
  text = parts[0] + '.' + ''.join(parts[1:])
377
-
378
- # Clean up leading/trailing dots if any
379
  text = text.strip('.')
380
-
381
- # Validate the final text format
382
- # Allow optional leading zero, and optional decimal with up to 3 places
383
- if re.fullmatch(r"^\d*\.?\d{0,3}$", text) and len(text.replace('.', '')) > 0: # Ensure at least one digit
384
  try:
385
  weight = float(text)
386
- # Refined scoring for weights within a reasonable range
387
  range_score = 1.0
388
- if 0.1 <= weight <= 250: # Very common personal scale range
389
  range_score = 1.5
390
- elif weight > 250 and weight <= 500: # Larger weights
391
  range_score = 1.2
392
  elif weight > 500 and weight <= 1000:
393
  range_score = 1.0
394
- else: # Very small or very large weights
395
  range_score = 0.5
396
-
397
  digit_count = len(text.replace('.', ''))
398
  digit_score = 1.0
399
- if digit_count >= 2 and digit_count <= 5: # Prefer weights with 2-5 digits (e.g., 5.0, 75.5, 123.4)
400
  digit_score = 1.3
401
- elif digit_count == 1: # Single digit weights less common but possible
402
  digit_score = 0.8
403
-
404
  score = conf * range_score * digit_score
405
-
406
- # Also consider area of the bounding box relative to ROI for confidence
407
  if roi_bbox:
408
  (x_roi, y_roi, w_roi, h_roi) = roi_bbox
409
  roi_area = w_roi * h_roi
410
- # Calculate bbox area accurately
411
  x_min, y_min = int(min(b[0] for b in bbox)), int(min(b[1] for b in bbox))
412
  x_max, y_max = int(max(b[0] for b in bbox)), int(max(b[1] for b in bbox))
413
  bbox_area = (x_max - x_min) * (y_max - y_min)
414
-
415
- if roi_area > 0 and bbox_area / roi_area < 0.03: # Very small bounding boxes might be noise
416
- score *= 0.5
417
-
418
- # Penalize if bbox is too narrow (e.g., single line detected as digit)
419
  bbox_aspect_ratio = (x_max - x_min) / (y_max - y_min) if (y_max - y_min) > 0 else 0
420
- if bbox_aspect_ratio < 0.2: # Very thin bounding boxes
421
  score *= 0.7
422
-
423
  if score > best_score and conf > conf_threshold:
424
  best_weight = text
425
  best_conf = conf
426
  best_score = score
427
  logging.info(f"Candidate EasyOCR weight: '{text}', Conf: {conf}, Score: {score}")
428
-
429
  except ValueError:
430
  logging.warning(f"Could not convert '{text}' to float during EasyOCR fallback.")
431
  continue
@@ -434,29 +320,26 @@ def extract_weight_from_image(pil_img):
434
  logging.info("No valid weight detected after all attempts.")
435
  return "Not detected", 0.0
436
 
437
- # Final formatting of the best detected weight
438
  if "." in best_weight:
439
  int_part, dec_part = best_weight.split(".")
440
- int_part = int_part.lstrip("0") or "0" # Remove leading zeros, keep "0" for 0.x
441
- dec_part = dec_part.rstrip('0') # Remove trailing zeros after decimal
442
-
443
- if not dec_part and int_part != "0": # If decimal part is empty (e.g., "50."), remove the dot
444
  best_weight = int_part
445
- elif not dec_part and int_part == "0": # if it's "0." keep it as "0"
446
  best_weight = "0"
447
  else:
448
  best_weight = f"{int_part}.{dec_part}"
449
  else:
450
- best_weight = best_weight.lstrip('0') or "0" # Remove leading zeros, keep "0"
451
 
452
- # Final check for extremely unlikely weights (e.g., 0.0001, 9999)
453
  try:
454
  final_float_weight = float(best_weight)
455
- if final_float_weight < 0.01 or final_float_weight > 1000: # Adjust this range if needed
456
  logging.warning(f"Detected weight {final_float_weight} is outside typical range, reducing confidence.")
457
- best_conf *= 0.5 # Reduce confidence for out-of-range values
458
  except ValueError:
459
- pass # Should not happen if previous parsing worked
460
 
461
  logging.info(f"Final detected weight: {best_weight}, Confidence: {round(best_conf * 100, 2)}%")
462
  return best_weight, round(best_conf * 100, 2)
 
10
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
11
 
12
  # Initialize EasyOCR
 
 
13
  easyocr_reader = easyocr.Reader(['en'], gpu=False)
14
 
15
  # Directory for debug images
 
20
  """Saves an image to the debug directory with a timestamp."""
21
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
22
  filename = os.path.join(DEBUG_DIR, f"{prefix}{timestamp}_{filename_suffix}.png")
23
+ if len(img.shape) == 3: # Color image
24
  cv2.imwrite(filename, img)
25
+ else: # Grayscale image
26
  cv2.imwrite(filename, img)
27
  logging.info(f"Saved debug image: {filename}")
28
 
 
29
  def estimate_brightness(img):
30
  """Estimate image brightness to detect illuminated displays"""
31
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 
38
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
39
  save_debug_image(gray, "02_grayscale")
40
 
41
+ # Use adaptive thresholding for better robustness
42
+ thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
43
+ cv2.THRESH_BINARY, 11, 2)
44
+ save_debug_image(thresh, "03_roi_adaptive_threshold")
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ kernel = np.ones((7, 7), np.uint8) # Smaller kernel
47
+ dilated = cv2.dilate(thresh, kernel, iterations=3) # Fewer iterations
 
 
48
  save_debug_image(dilated, "04_roi_dilated")
49
 
50
  contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
51
 
52
  if contours:
 
53
  img_area = img.shape[0] * img.shape[1]
54
  valid_contours = []
55
  for c in contours:
56
  area = cv2.contourArea(c)
57
+ # Relaxed area and aspect ratio filters
58
+ if 500 < area < (img_area * 0.95):
59
  x, y, w, h = cv2.boundingRect(c)
60
  aspect_ratio = w / h
61
+ if 1.5 <= aspect_ratio <= 6.0 and w > 80 and h > 40:
 
62
  valid_contours.append(c)
63
 
64
  if valid_contours:
 
65
  for contour in sorted(valid_contours, key=cv2.contourArea, reverse=True):
66
  x, y, w, h = cv2.boundingRect(contour)
67
+ padding = 60 # Increased padding
 
 
68
  x, y = max(0, x - padding), max(0, y - padding)
69
  w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
 
70
  roi_img = img[y:y+h, x:x+w]
71
  save_debug_image(roi_img, "05_detected_roi")
72
  logging.info(f"Detected ROI with dimensions: ({x}, {y}, {w}, {h})")
73
  return roi_img, (x, y, w, h)
74
 
75
+ logging.info("No suitable ROI found, returning original image.")
76
  save_debug_image(img, "05_no_roi_original_fallback")
77
  return img, None
78
  except Exception as e:
 
83
  def detect_segments(digit_img):
84
  """Detect seven-segment patterns in a digit image"""
85
  h, w = digit_img.shape
86
+ if h < 15 or w < 10:
87
  return None
88
 
 
 
89
  segments = {
90
  'top': (int(w*0.15), int(w*0.85), 0, int(h*0.2)),
91
  'middle': (int(w*0.15), int(w*0.85), int(h*0.4), int(h*0.6)),
 
98
 
99
  segment_presence = {}
100
  for name, (x1, x2, y1, y2) in segments.items():
 
101
  x1, y1 = max(0, x1), max(0, y1)
102
  x2, y2 = min(w, x2), min(h, y2)
 
103
  region = digit_img[y1:y2, x1:x2]
104
  if region.size == 0:
105
  segment_presence[name] = False
106
  continue
 
 
107
  pixel_count = np.sum(region == 255)
108
  total_pixels = region.size
109
+ segment_presence[name] = pixel_count / total_pixels > 0.45 # Lowered threshold
 
 
 
110
 
 
111
  digit_patterns = {
112
  '0': ('top', 'bottom', 'left_top', 'left_bottom', 'right_top', 'right_bottom'),
113
  '1': ('right_top', 'right_bottom'),
 
122
  }
123
 
124
  best_match = None
125
+ max_score = -1
 
126
  for digit, pattern in digit_patterns.items():
127
  matches = sum(1 for segment in pattern if segment_presence.get(segment, False))
 
 
128
  non_matches_penalty = sum(1 for segment in segment_presence if segment not in pattern and segment_presence[segment])
 
 
129
  current_score = matches - non_matches_penalty
 
 
130
  if all(segment_presence.get(s, False) for s in pattern):
131
+ current_score += 0.5
 
132
  if current_score > max_score:
133
  max_score = current_score
134
  best_match = digit
135
  elif current_score == max_score and best_match is not None:
 
136
  current_digit_non_matches = sum(1 for segment in segment_presence if segment not in pattern and segment_presence[segment])
137
  best_digit_pattern = digit_patterns[best_match]
138
+ best_digit_non_matches = sum(1 for segment in segment_presence if segment not in best_digit_pattern and segment_presence[segment])
139
  if current_digit_non_matches < best_digit_non_matches:
140
  best_match = digit
141
 
142
+ logging.debug(f"Segment presence: {segment_presence}, Detected digit: {best_match}")
 
 
 
143
  return best_match
144
 
145
  def custom_seven_segment_ocr(img, roi_bbox):
146
  """Perform custom OCR for seven-segment displays"""
147
  try:
148
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 
 
 
149
  brightness = estimate_brightness(img)
150
  if brightness > 150:
151
  _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
152
  else:
153
+ _, thresh = cv2.threshold(gray, 80, 255, cv2.THRESH_BINARY) # Lower threshold
154
  save_debug_image(thresh, "06_roi_thresh_for_digits")
155
 
 
 
 
 
156
  results = easyocr_reader.readtext(thresh, detail=1, paragraph=False,
157
+ contrast_ths=0.2, adjust_contrast=0.8,
158
+ text_threshold=0.7, mag_ratio=2.0,
159
+ allowlist='0123456789.', y_ths=0.3)
160
+
161
+ logging.info(f"EasyOCR results: {results}")
162
  if not results:
163
+ logging.info("EasyOCR found no digits.")
164
  return None
165
 
 
166
  digits_info = []
167
  for (bbox, text, conf) in results:
 
 
168
  (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
169
+ h_bbox = max(y1, y2, y3, y4) - min(y1, y2, y3, y4)
170
+ if len(text) == 1 and (text.isdigit() or text == '.') and h_bbox > 8:
171
  x_min, x_max = int(min(x1, x4)), int(max(x2, x3))
172
  y_min, y_max = int(min(y1, y2)), int(max(y3, y4))
173
  digits_info.append((x_min, x_max, y_min, y_max, text, conf))
174
 
175
+ digits_info.sort(key=lambda x: x[0])
 
 
176
  recognized_text = ""
177
  for idx, (x_min, x_max, y_min, y_max, easyocr_char, easyocr_conf) in enumerate(digits_info):
178
  x_min, y_min = max(0, x_min), max(0, y_min)
179
  x_max, y_max = min(thresh.shape[1], x_max), min(thresh.shape[0], y_max)
 
180
  if x_max <= x_min or y_max <= y_min:
181
  continue
 
182
  digit_img_crop = thresh[y_min:y_max, x_min:x_max]
183
  save_debug_image(digit_img_crop, f"07_digit_crop_{idx}_{easyocr_char}")
184
+ if easyocr_conf > 0.9 or easyocr_char == '.' or digit_img_crop.shape[0] < 15 or digit_img_crop.shape[1] < 10:
 
 
 
185
  recognized_text += easyocr_char
186
  else:
 
187
  digit_from_segments = detect_segments(digit_img_crop)
188
  if digit_from_segments:
189
  recognized_text += digit_from_segments
190
  else:
 
191
  recognized_text += easyocr_char
 
 
 
 
192
 
193
+ logging.info(f"Before validation, recognized_text: {recognized_text}")
194
+ text = re.sub(r"[^\d\.]", "", recognized_text)
195
  if text.count('.') > 1:
196
+ text = text.replace('.', '', text.count('.') - 1)
197
+ if text and re.fullmatch(r"^\d*\.?\d*$", text) and len(text) > 0:
198
+ if text.startswith('.'):
 
 
 
 
199
  text = "0" + text
200
+ if text.endswith('.'):
 
201
  text = text.rstrip('.')
 
 
202
  if text == '.' or text == '':
203
  return None
204
  return text
205
+ logging.info(f"Custom OCR text '{recognized_text}' failed validation.")
206
  return None
207
  except Exception as e:
208
  logging.error(f"Custom seven-segment OCR failed: {str(e)}")
209
  return None
210
 
211
  def extract_weight_from_image(pil_img):
212
+ """Extract weight from a PIL image of a digital scale display"""
213
  try:
214
  img = np.array(pil_img)
215
  img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
216
+ save_debug_image(img, "00_input_image") # Log input image
217
 
218
  brightness = estimate_brightness(img)
219
+ conf_threshold = 0.6 if brightness > 150 else (0.5 if brightness > 80 else 0.4)
 
220
 
 
221
  roi_img, roi_bbox = detect_roi(img)
 
 
222
  custom_result = custom_seven_segment_ocr(roi_img, roi_bbox)
223
  if custom_result:
 
224
  if "." in custom_result:
225
  int_part, dec_part = custom_result.split(".")
226
  int_part = int_part.lstrip("0") or "0"
227
  dec_part = dec_part.rstrip('0')
228
+ if not dec_part and int_part != "0":
229
  custom_result = int_part
230
+ elif not dec_part and int_part == "0":
231
  custom_result = "0"
232
  else:
233
  custom_result = f"{int_part}.{dec_part}"
234
  else:
235
  custom_result = custom_result.lstrip('0') or "0"
 
 
236
  try:
237
  float(custom_result)
238
  logging.info(f"Custom OCR result: {custom_result}, Confidence: 100.0%")
239
+ return custom_result, 100.0
240
  except ValueError:
241
  logging.warning(f"Custom OCR result '{custom_result}' is not a valid number, falling back.")
242
+ custom_result = None
243
 
 
244
  logging.info("Custom OCR failed or invalid, falling back to general EasyOCR.")
 
 
245
  processed_roi_img_gray = cv2.cvtColor(roi_img, cv2.COLOR_BGR2GRAY)
246
+ kernel_sharpening = np.array([[-1,-1,-1], [-1,9,-1], [-1,-1,-1]])
 
 
 
 
247
  sharpened_roi = cv2.filter2D(processed_roi_img_gray, -1, kernel_sharpening)
248
  save_debug_image(sharpened_roi, "08_fallback_sharpened")
 
 
 
249
  processed_roi_img_final = cv2.adaptiveThreshold(sharpened_roi, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
250
+ cv2.THRESH_BINARY, 21, 5)
251
  save_debug_image(processed_roi_img_final, "09_fallback_adaptive_thresh")
252
 
 
 
 
253
  results = easyocr_reader.readtext(processed_roi_img_final, detail=1, paragraph=False,
254
+ contrast_ths=0.3, adjust_contrast=0.9,
255
+ text_threshold=0.5, mag_ratio=2.0,
256
+ allowlist='0123456789.', batch_size=4, y_ths=0.3)
257
 
258
  best_weight = None
259
  best_conf = 0.0
260
  best_score = 0.0
 
261
  for (bbox, text, conf) in results:
262
  text = text.lower().strip()
263
+ text = text.replace(",", ".").replace(";", ".").replace(":", ".").replace(" ", "")
264
+ text = text.replace("o", "0").replace("O", "0").replace("q", "0").replace("Q", "0")
 
 
265
  text = text.replace("s", "5").replace("S", "5")
266
+ text = text.replace("g", "9").replace("G", "6")
267
+ text = text.replace("l", "1").replace("I", "1").replace("|", "1")
268
  text = text.replace("b", "8").replace("B", "8")
269
  text = text.replace("z", "2").replace("Z", "2")
270
+ text = text.replace("a", "4").replace("A", "4")
271
+ text = text.replace("e", "3")
272
+ text = text.replace("t", "7")
273
+ text = text.replace("~", "").replace("`", "")
274
+ text = re.sub(r"(kgs|kg|k|lb|g|gr|pounds|lbs)\b", "", text)
 
 
 
275
  text = re.sub(r"[^\d\.]", "", text)
 
 
276
  if text.count('.') > 1:
277
  parts = text.split('.')
278
  text = parts[0] + '.' + ''.join(parts[1:])
 
 
279
  text = text.strip('.')
280
+ if re.fullmatch(r"^\d*\.?\d{0,3}$", text) and len(text.replace('.', '')) > 0:
 
 
 
281
  try:
282
  weight = float(text)
 
283
  range_score = 1.0
284
+ if 0.1 <= weight <= 250:
285
  range_score = 1.5
286
+ elif weight > 250 and weight <= 500:
287
  range_score = 1.2
288
  elif weight > 500 and weight <= 1000:
289
  range_score = 1.0
290
+ else:
291
  range_score = 0.5
 
292
  digit_count = len(text.replace('.', ''))
293
  digit_score = 1.0
294
+ if digit_count >= 2 and digit_count <= 5:
295
  digit_score = 1.3
296
+ elif digit_count == 1:
297
  digit_score = 0.8
 
298
  score = conf * range_score * digit_score
 
 
299
  if roi_bbox:
300
  (x_roi, y_roi, w_roi, h_roi) = roi_bbox
301
  roi_area = w_roi * h_roi
 
302
  x_min, y_min = int(min(b[0] for b in bbox)), int(min(b[1] for b in bbox))
303
  x_max, y_max = int(max(b[0] for b in bbox)), int(max(b[1] for b in bbox))
304
  bbox_area = (x_max - x_min) * (y_max - y_min)
305
+ if roi_area > 0 and bbox_area / roi_area < 0.03:
306
+ score *= 0.5
 
 
 
307
  bbox_aspect_ratio = (x_max - x_min) / (y_max - y_min) if (y_max - y_min) > 0 else 0
308
+ if bbox_aspect_ratio < 0.2:
309
  score *= 0.7
 
310
  if score > best_score and conf > conf_threshold:
311
  best_weight = text
312
  best_conf = conf
313
  best_score = score
314
  logging.info(f"Candidate EasyOCR weight: '{text}', Conf: {conf}, Score: {score}")
 
315
  except ValueError:
316
  logging.warning(f"Could not convert '{text}' to float during EasyOCR fallback.")
317
  continue
 
320
  logging.info("No valid weight detected after all attempts.")
321
  return "Not detected", 0.0
322
 
 
323
  if "." in best_weight:
324
  int_part, dec_part = best_weight.split(".")
325
+ int_part = int_part.lstrip("0") or "0"
326
+ dec_part = dec_part.rstrip('0')
327
+ if not dec_part and int_part != "0":
 
328
  best_weight = int_part
329
+ elif not dec_part and int_part == "0":
330
  best_weight = "0"
331
  else:
332
  best_weight = f"{int_part}.{dec_part}"
333
  else:
334
+ best_weight = best_weight.lstrip('0') or "0"
335
 
 
336
  try:
337
  final_float_weight = float(best_weight)
338
+ if final_float_weight < 0.01 or final_float_weight > 1000:
339
  logging.warning(f"Detected weight {final_float_weight} is outside typical range, reducing confidence.")
340
+ best_conf *= 0.5
341
  except ValueError:
342
+ pass
343
 
344
  logging.info(f"Final detected weight: {best_weight}, Confidence: {round(best_conf * 100, 2)}%")
345
  return best_weight, round(best_conf * 100, 2)