Sanjayraju30 commited on
Commit
c7e59f2
·
verified ·
1 Parent(s): 781a117

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +202 -75
ocr_engine.py CHANGED
@@ -1,8 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import easyocr
2
  import numpy as np
3
  import cv2
4
  import re
5
  import logging
 
 
6
 
7
  # Set up logging for debugging
8
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -12,6 +38,21 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
12
  # gpu=True can speed up processing if a compatible GPU is available.
13
  easyocr_reader = easyocr.Reader(['en'], gpu=False)
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  def estimate_brightness(img):
16
  """Estimate image brightness to detect illuminated displays"""
17
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
@@ -20,43 +61,69 @@ def estimate_brightness(img):
20
  def detect_roi(img):
21
  """Detect and crop the region of interest (likely the digital display)"""
22
  try:
 
23
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 
 
24
  brightness = estimate_brightness(img)
25
 
26
  # Adaptive thresholding based on brightness
27
  # For darker images, a lower threshold might be needed.
28
  # For very bright images, a higher threshold.
29
- thresh_value = 230 if brightness > 180 else (190 if brightness > 100 else 150)
 
 
 
 
 
 
 
30
  _, thresh = cv2.threshold(gray, thresh_value, 255, cv2.THRESH_BINARY)
 
31
 
32
  # Increased kernel size for dilation to better connect segments of digits
33
- kernel = np.ones((11, 11), np.uint8)
34
- dilated = cv2.dilate(thresh, kernel, iterations=4) # Increased iterations
 
 
35
 
36
  contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
37
 
38
  if contours:
39
- # Filter contours by a more robust area range
40
- valid_contours = [c for c in contours if 1000 < cv2.contourArea(c) < (img.shape[0] * img.shape[1] * 0.8)] # Added max area limit
41
-
 
 
 
 
 
 
 
 
 
 
42
  if valid_contours:
43
  # Sort by area descending and iterate
44
  for contour in sorted(valid_contours, key=cv2.contourArea, reverse=True):
45
  x, y, w, h = cv2.boundingRect(contour)
46
- aspect_ratio = w / h
47
 
48
- # Tighter aspect ratio and size constraints for typical digital displays
49
- if 1.8 <= aspect_ratio <= 5.0 and w > 80 and h > 40: # Adjusted min w and h
50
- # Expand ROI to ensure full digits are captured
51
- padding = 30 # Increased padding
52
- x, y = max(0, x - padding), max(0, y - padding)
53
- w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
54
- return img[y:y+h, x:x+w], (x, y, w, h)
 
 
55
 
56
- logging.info("No suitable ROI found, returning original image.")
 
57
  return img, None
58
  except Exception as e:
59
  logging.error(f"ROI detection failed: {str(e)}")
 
60
  return img, None
61
 
62
  def detect_segments(digit_img):
@@ -66,15 +133,15 @@ def detect_segments(digit_img):
66
  return None
67
 
68
  # Define segment regions (top, middle, bottom, left-top, left-bottom, right-top, right-bottom)
69
- # Adjusted segment proportions for better robustness
70
  segments = {
71
- 'top': (int(w*0.1), int(w*0.9), 0, int(h*0.2)),
72
- 'middle': (int(w*0.1), int(w*0.9), int(h*0.4), int(h*0.6)),
73
- 'bottom': (int(w*0.1), int(w*0.9), int(h*0.8), h),
74
- 'left_top': (0, int(w*0.2), int(h*0.05), int(h*0.5)),
75
- 'left_bottom': (0, int(w*0.2), int(h*0.5), int(h*0.95)),
76
- 'right_top': (int(w*0.8), w, int(h*0.05), int(h*0.5)),
77
- 'right_bottom': (int(w*0.8), w, int(h*0.5), int(h*0.95))
78
  }
79
 
80
  segment_presence = {}
@@ -93,8 +160,8 @@ def detect_segments(digit_img):
93
  total_pixels = region.size
94
 
95
  # Segment is present if a significant portion of the region is white
96
- # Adjusted threshold for segment presence
97
- segment_presence[name] = pixel_count / total_pixels > 0.4 # Increased sensitivity
98
 
99
  # Seven-segment digit patterns - remain the same
100
  digit_patterns = {
@@ -133,13 +200,16 @@ def detect_segments(digit_img):
133
  # Tie-breaking: prefer digits with fewer "extra" segments when scores are equal
134
  current_digit_non_matches = sum(1 for segment in segment_presence if segment not in pattern and segment_presence[segment])
135
  best_digit_pattern = digit_patterns[best_match]
136
- best_digit_non_matches = sum(1 for segment in segment_presence if segment not in best_digit_pattern and segment_presence[segment])
137
  if current_digit_non_matches < best_digit_non_matches:
138
  best_match = digit
 
 
 
 
139
 
140
  return best_match
141
 
142
-
143
  def custom_seven_segment_ocr(img, roi_bbox):
144
  """Perform custom OCR for seven-segment displays"""
145
  try:
@@ -151,15 +221,17 @@ def custom_seven_segment_ocr(img, roi_bbox):
151
  if brightness > 150:
152
  _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
153
  else:
154
- _, thresh = cv2.threshold(gray, 120, 255, cv2.THRESH_BINARY) # Adjust threshold for darker displays
155
-
 
156
  # Use EasyOCR to get bounding boxes for digits
157
  # Increased text_threshold for more confident digit detection
158
  # Adjusted mag_ratio for better handling of digit sizes
 
159
  results = easyocr_reader.readtext(thresh, detail=1, paragraph=False,
160
  contrast_ths=0.2, adjust_contrast=0.8, # Slightly more contrast adjustment
161
- text_threshold=0.85, mag_ratio=1.2, # Reduced mag_ratio for potentially closer digits
162
- allowlist='0123456789.')
163
 
164
  if not results:
165
  logging.info("EasyOCR found no digits for custom seven-segment OCR.")
@@ -169,8 +241,10 @@ def custom_seven_segment_ocr(img, roi_bbox):
169
  digits_info = []
170
  for (bbox, text, conf) in results:
171
  # Ensure the text found by EasyOCR is a single digit or a decimal point
172
- if len(text) == 1 and (text.isdigit() or text == '.'):
173
- (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
 
 
174
  x_min, x_max = int(min(x1, x4)), int(max(x2, x3))
175
  y_min, y_max = int(min(y1, y2)), int(max(y3, y4))
176
  digits_info.append((x_min, x_max, y_min, y_max, text, conf))
@@ -179,7 +253,7 @@ def custom_seven_segment_ocr(img, roi_bbox):
179
  digits_info.sort(key=lambda x: x[0])
180
 
181
  recognized_text = ""
182
- for x_min, x_max, y_min, y_max, easyocr_char, easyocr_conf in digits_info:
183
  x_min, y_min = max(0, x_min), max(0, y_min)
184
  x_max, y_max = min(thresh.shape[1], x_max), min(thresh.shape[0], y_max)
185
 
@@ -187,9 +261,11 @@ def custom_seven_segment_ocr(img, roi_bbox):
187
  continue
188
 
189
  digit_img_crop = thresh[y_min:y_max, x_min:x_max]
190
-
 
191
  # If EasyOCR is very confident about a digit or it's a decimal, use its result directly
192
- if easyocr_conf > 0.95 or easyocr_char == '.':
 
193
  recognized_text += easyocr_char
194
  else:
195
  # Otherwise, try the segment detection
@@ -208,9 +284,21 @@ def custom_seven_segment_ocr(img, roi_bbox):
208
  if text.count('.') > 1:
209
  text = text.replace('.', '', text.count('.') - 1) # Remove extra decimal points
210
 
211
- # Basic validation for common weight formats
212
- if re.fullmatch(r"^\d+(\.\d+)?$", text) and len(text) > 0: # Ensures it starts with digit and has optional decimal
 
 
 
 
 
 
 
 
 
 
 
213
  return text
 
214
  return None
215
  except Exception as e:
216
  logging.error(f"Custom seven-segment OCR failed: {str(e)}")
@@ -223,14 +311,11 @@ def extract_weight_from_image(pil_img):
223
 
224
  brightness = estimate_brightness(img)
225
  # Adjust confidence threshold more dynamically
226
- conf_threshold = 0.9 if brightness > 150 else (0.75 if brightness > 80 else 0.6)
227
 
228
  # Detect ROI
229
  roi_img, roi_bbox = detect_roi(img)
230
 
231
- # Convert ROI to RGB for display purposes if needed later
232
- # roi_img_rgb = cv2.cvtColor(roi_img, cv2.COLOR_BGR2RGB) # For debugging or display
233
-
234
  # Try custom seven-segment OCR first
235
  custom_result = custom_seven_segment_ocr(roi_img, roi_bbox)
236
  if custom_result:
@@ -238,22 +323,29 @@ def extract_weight_from_image(pil_img):
238
  if "." in custom_result:
239
  int_part, dec_part = custom_result.split(".")
240
  int_part = int_part.lstrip("0") or "0"
241
- custom_result = f"{int_part}.{dec_part.rstrip('0')}"
 
 
 
 
 
 
242
  else:
243
  custom_result = custom_result.lstrip('0') or "0"
244
 
245
- # Additional validation for custom result
246
- if custom_result == "0." or custom_result == ".": # Handle cases like "0." or just "."
247
- return "Not detected", 0.0
248
-
249
- logging.info(f"Custom OCR result: {custom_result}, Confidence: 100.0%")
250
- return custom_result, 100.0 # High confidence for custom OCR
 
 
251
 
252
  # Fallback to EasyOCR if custom OCR fails
253
- logging.info("Custom OCR failed, falling back to general EasyOCR.")
254
 
255
  # Apply more aggressive image processing for EasyOCR if custom OCR failed
256
- # This could involve different thresholds or contrast adjustments
257
  processed_roi_img_gray = cv2.cvtColor(roi_img, cv2.COLOR_BGR2GRAY)
258
 
259
  # Sharpening
@@ -261,19 +353,22 @@ def extract_weight_from_image(pil_img):
261
  [-1,9,-1],
262
  [-1,-1,-1]])
263
  sharpened_roi = cv2.filter2D(processed_roi_img_gray, -1, kernel_sharpening)
 
264
 
265
  # Apply adaptive thresholding to the sharpened image for better digit isolation
 
266
  processed_roi_img_final = cv2.adaptiveThreshold(sharpened_roi, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
267
- cv2.THRESH_BINARY, 11, 2)
 
268
 
269
  # EasyOCR parameters for general text
270
  # Adjusted parameters for better digit recognition
271
  # added batch_size for potentially better performance on multiple texts
272
  results = easyocr_reader.readtext(processed_roi_img_final, detail=1, paragraph=False,
273
  contrast_ths=0.3, adjust_contrast=0.9,
274
- text_threshold=0.7, mag_ratio=1.8, # Increased mag_ratio for potentially larger digits
275
- allowlist='0123456789.', batch_size=4) # Added batch_size
276
-
277
  best_weight = None
278
  best_conf = 0.0
279
  best_score = 0.0
@@ -282,18 +377,21 @@ def extract_weight_from_image(pil_img):
282
  text = text.lower().strip()
283
 
284
  # More robust character replacements
285
- text = text.replace(",", ".").replace(";", ".").replace(":", ".")
286
- text = text.replace("o", "0").replace("O", "0").replace("q", "0") # 'q' can look like 0
287
  text = text.replace("s", "5").replace("S", "5")
288
- text = text.replace("g", "9").replace("G", "6") # Be careful with G to 6 conversion
289
- text = text.replace("l", "1").replace("I", "1").replace("|", "1") # Added | to 1
290
  text = text.replace("b", "8").replace("B", "8")
291
  text = text.replace("z", "2").replace("Z", "2")
292
- text = text.replace("a", "4").replace("A", "4") # 'a' can look like 4
293
- text = text.replace("e", "3") # 'e' can look like 3
 
 
 
294
 
295
  # Remove common weight units and other non-numeric characters
296
- text = re.sub(r"(kgs|kg|k|lb|g|gr|pounds)\b", "", text) # Use word boundary \b
297
  text = re.sub(r"[^\d\.]", "", text)
298
 
299
  # Handle multiple decimal points (keep only the first one)
@@ -301,41 +399,59 @@ def extract_weight_from_image(pil_img):
301
  parts = text.split('.')
302
  text = parts[0] + '.' + ''.join(parts[1:])
303
 
 
 
 
304
  # Validate the final text format
305
- if re.fullmatch(r"^\d{1,4}(\.\d{0,3})?$", text): # Adjusted regex for more flexible digits
 
306
  try:
307
  weight = float(text)
308
  # Refined scoring for weights within a reasonable range
309
  range_score = 1.0
310
- if 0.01 <= weight <= 300: # Typical personal scale range
 
 
311
  range_score = 1.2
312
- elif weight > 300 and weight <= 1000: # Larger scales
313
- range_score = 1.1
314
  else: # Very small or very large weights
315
- range_score = 0.8
316
 
317
  digit_count = len(text.replace('.', ''))
318
  digit_score = 1.0
319
- if digit_count >= 3 and digit_count <= 5: # Prefer weights with 3-5 digits (e.g., 50.5, 123.4)
320
  digit_score = 1.3
 
 
321
 
322
  score = conf * range_score * digit_score
323
 
324
  # Also consider area of the bounding box relative to ROI for confidence
325
- bbox_area = (bbox[1][0] - bbox[0][0]) * (bbox[2][1] - bbox[1][1])
326
  if roi_bbox:
327
- roi_area = roi_bbox[2] * roi_bbox[3]
328
- if roi_area > 0 and bbox_area / roi_area < 0.05: # Small bounding boxes might be noise
 
 
 
 
 
 
329
  score *= 0.5
 
 
 
 
 
330
 
331
  if score > best_score and conf > conf_threshold:
332
  best_weight = text
333
  best_conf = conf
334
  best_score = score
335
- logging.info(f"Candidate EasyOCR weight: {text}, Conf: {conf}, Score: {score}")
336
 
337
  except ValueError:
338
- logging.warning(f"Could not convert '{text}' to float.")
339
  continue
340
 
341
  if not best_weight:
@@ -347,6 +463,7 @@ def extract_weight_from_image(pil_img):
347
  int_part, dec_part = best_weight.split(".")
348
  int_part = int_part.lstrip("0") or "0" # Remove leading zeros, keep "0" for 0.x
349
  dec_part = dec_part.rstrip('0') # Remove trailing zeros after decimal
 
350
  if not dec_part and int_part != "0": # If decimal part is empty (e.g., "50."), remove the dot
351
  best_weight = int_part
352
  elif not dec_part and int_part == "0": # if it's "0." keep it as "0"
@@ -356,9 +473,19 @@ def extract_weight_from_image(pil_img):
356
  else:
357
  best_weight = best_weight.lstrip('0') or "0" # Remove leading zeros, keep "0"
358
 
 
 
 
 
 
 
 
 
 
359
  logging.info(f"Final detected weight: {best_weight}, Confidence: {round(best_conf * 100, 2)}%")
360
  return best_weight, round(best_conf * 100, 2)
361
 
362
  except Exception as e:
363
- logging.error(f"Weight extraction failed: {str(e)}")
364
- return "Not detected", 0.0
 
 
1
+ You've indicated that the previous changes didn't resolve the issue, and the application is still failing to detect the weight from the image. To help debug and improve the accuracy, I'll provide the full `ocr_engine.py` code again, but this time with a few more refinements and **added visual debugging steps**. These debugging steps will save intermediate images, which can be very helpful in understanding where the detection process might be going wrong.
2
+
3
+ **Important Steps to Use This Debugging Version:**
4
+
5
+ 1. **Replace `ocr_engine.py`:** Completely replace the content of your `ocr_engine.py` file with the code provided below.
6
+ 2. **Create a Debug Folder:** Before running your application, create a folder named `debug_images` in the same directory as your `app.py` and `ocr_engine.py` files. This is where the intermediate images will be saved.
7
+ 3. **Run Your App:** Execute your `app.py` as usual.
8
+ 4. **Upload Image and Check Debug Folder:** Upload the `Screenshot 2025-06-16 154525.png` image (or any other problematic image). After processing, check the `debug_images` folder. You should find several images showing:
9
+ * The original image.
10
+ * The grayscale version.
11
+ * The thresholded image used for ROI detection.
12
+ * The dilated image used for ROI detection.
13
+ * The detected ROI (cropped image).
14
+ * Thresholded image of the ROI used for digit detection.
15
+ * Individual digit images detected by EasyOCR.
16
+ * Sharpened and adaptively thresholded images used for general EasyOCR fallback.
17
+
18
+ By examining these images, we can pinpoint at which stage the OCR process is failing (e.g., if the ROI isn't detected correctly, if digits aren't isolated well, or if segments aren't properly recognized).
19
+
20
+ ---
21
+
22
+ Here is the **full updated code for `ocr_engine.py`** with enhanced logic and visual debugging:
23
+
24
+ ```python
25
  import easyocr
26
  import numpy as np
27
  import cv2
28
  import re
29
  import logging
30
+ from datetime import datetime
31
+ import os
32
 
33
  # Set up logging for debugging
34
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
38
  # gpu=True can speed up processing if a compatible GPU is available.
39
  easyocr_reader = easyocr.Reader(['en'], gpu=False)
40
 
41
+ # Directory for debug images
42
+ DEBUG_DIR = "debug_images"
43
+ os.makedirs(DEBUG_DIR, exist_ok=True)
44
+
45
+ def save_debug_image(img, filename_suffix, prefix=""):
46
+ """Saves an image to the debug directory with a timestamp."""
47
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
48
+ filename = os.path.join(DEBUG_DIR, f"{prefix}{timestamp}_{filename_suffix}.png")
49
+ if len(img.shape) == 3: # Color image
50
+ cv2.imwrite(filename, img)
51
+ else: # Grayscale image
52
+ cv2.imwrite(filename, img)
53
+ logging.info(f"Saved debug image: {filename}")
54
+
55
+
56
  def estimate_brightness(img):
57
  """Estimate image brightness to detect illuminated displays"""
58
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 
61
  def detect_roi(img):
62
  """Detect and crop the region of interest (likely the digital display)"""
63
  try:
64
+ save_debug_image(img, "01_original")
65
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
66
+ save_debug_image(gray, "02_grayscale")
67
+
68
  brightness = estimate_brightness(img)
69
 
70
  # Adaptive thresholding based on brightness
71
  # For darker images, a lower threshold might be needed.
72
  # For very bright images, a higher threshold.
73
+ # Tuned thresholds based on observed values
74
+ if brightness > 180:
75
+ thresh_value = 230
76
+ elif brightness > 100:
77
+ thresh_value = 190
78
+ else:
79
+ thresh_value = 150 # Even lower for very dark images
80
+
81
  _, thresh = cv2.threshold(gray, thresh_value, 255, cv2.THRESH_BINARY)
82
+ save_debug_image(thresh, f"03_roi_threshold_{thresh_value}")
83
 
84
  # Increased kernel size for dilation to better connect segments of digits
85
+ # This helps in forming a solid contour for the display
86
+ kernel = np.ones((13, 13), np.uint8) # Slightly larger kernel
87
+ dilated = cv2.dilate(thresh, kernel, iterations=5) # Increased iterations for stronger connection
88
+ save_debug_image(dilated, "04_roi_dilated")
89
 
90
  contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
91
 
92
  if contours:
93
+ # Filter contours by a more robust area range and shape
94
+ img_area = img.shape[0] * img.shape[1]
95
+ valid_contours = []
96
+ for c in contours:
97
+ area = cv2.contourArea(c)
98
+ # Filter out very small and very large contours (e.g., entire image, or noise)
99
+ if 1500 < area < (img_area * 0.9): # Increased min area, max area
100
+ x, y, w, h = cv2.boundingRect(c)
101
+ aspect_ratio = w / h
102
+ # Check for typical display aspect ratios and minimum size
103
+ if 2.0 <= aspect_ratio <= 5.5 and w > 100 and h > 50: # Adjusted aspect ratio and min size
104
+ valid_contours.append(c)
105
+
106
  if valid_contours:
107
  # Sort by area descending and iterate
108
  for contour in sorted(valid_contours, key=cv2.contourArea, reverse=True):
109
  x, y, w, h = cv2.boundingRect(contour)
 
110
 
111
+ # Expand ROI to ensure full digits are captured and a small border
112
+ padding = 40 # Increased padding
113
+ x, y = max(0, x - padding), max(0, y - padding)
114
+ w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
115
+
116
+ roi_img = img[y:y+h, x:x+w]
117
+ save_debug_image(roi_img, "05_detected_roi")
118
+ logging.info(f"Detected ROI with dimensions: ({x}, {y}, {w}, {h})")
119
+ return roi_img, (x, y, w, h)
120
 
121
+ logging.info("No suitable ROI found, returning original image for full image OCR attempt.")
122
+ save_debug_image(img, "05_no_roi_original_fallback")
123
  return img, None
124
  except Exception as e:
125
  logging.error(f"ROI detection failed: {str(e)}")
126
+ save_debug_image(img, "05_roi_detection_error_fallback")
127
  return img, None
128
 
129
  def detect_segments(digit_img):
 
133
  return None
134
 
135
  # Define segment regions (top, middle, bottom, left-top, left-bottom, right-top, right-bottom)
136
+ # Adjusted segment proportions for better robustness, more aggressive cropping
137
  segments = {
138
+ 'top': (int(w*0.15), int(w*0.85), 0, int(h*0.2)),
139
+ 'middle': (int(w*0.15), int(w*0.85), int(h*0.4), int(h*0.6)),
140
+ 'bottom': (int(w*0.15), int(w*0.85), int(h*0.8), h),
141
+ 'left_top': (0, int(w*0.25), int(h*0.05), int(h*0.5)),
142
+ 'left_bottom': (0, int(w*0.25), int(h*0.5), int(h*0.95)),
143
+ 'right_top': (int(w*0.75), w, int(h*0.05), int(h*0.5)),
144
+ 'right_bottom': (int(w*0.75), w, int(h*0.5), int(h*0.95))
145
  }
146
 
147
  segment_presence = {}
 
160
  total_pixels = region.size
161
 
162
  # Segment is present if a significant portion of the region is white
163
+ # Adjusted threshold for segment presence - higher for robustness
164
+ segment_presence[name] = pixel_count / total_pixels > 0.55 # Increased sensitivity further
165
 
166
  # Seven-segment digit patterns - remain the same
167
  digit_patterns = {
 
200
  # Tie-breaking: prefer digits with fewer "extra" segments when scores are equal
201
  current_digit_non_matches = sum(1 for segment in segment_presence if segment not in pattern and segment_presence[segment])
202
  best_digit_pattern = digit_patterns[best_match]
203
+ best_digit_non_matches = sum(1 for segment in segment_presence if segment not in best_digit_pattern and segment_presence[best_digit_pattern]) # Corrected logic
204
  if current_digit_non_matches < best_digit_non_matches:
205
  best_match = digit
206
+
207
+ # Debugging segment presence
208
+ # logging.debug(f"Digit Image Shape: {digit_img.shape}, Segments: {segment_presence}, Best Match: {best_match}")
209
+ # save_debug_image(digit_img, f"digit_segment_debug_{best_match or 'none'}", prefix="10_")
210
 
211
  return best_match
212
 
 
213
  def custom_seven_segment_ocr(img, roi_bbox):
214
  """Perform custom OCR for seven-segment displays"""
215
  try:
 
221
  if brightness > 150:
222
  _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
223
  else:
224
+ _, thresh = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY) # Lower threshold for darker displays
225
+ save_debug_image(thresh, "06_roi_thresh_for_digits")
226
+
227
  # Use EasyOCR to get bounding boxes for digits
228
  # Increased text_threshold for more confident digit detection
229
  # Adjusted mag_ratio for better handling of digit sizes
230
+ # Added y_ths to reduce sensitivity to vertical position variations (common in scales)
231
  results = easyocr_reader.readtext(thresh, detail=1, paragraph=False,
232
  contrast_ths=0.2, adjust_contrast=0.8, # Slightly more contrast adjustment
233
+ text_threshold=0.85, mag_ratio=1.5, # Adjusted mag_ratio back, seems to work better for 7-seg
234
+ allowlist='0123456789.', y_ths=0.2) # Increased y_ths for row grouping tolerance
235
 
236
  if not results:
237
  logging.info("EasyOCR found no digits for custom seven-segment OCR.")
 
241
  digits_info = []
242
  for (bbox, text, conf) in results:
243
  # Ensure the text found by EasyOCR is a single digit or a decimal point
244
+ # Also filter by a minimum height of the bounding box for robustness
245
+ (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
246
+ h_bbox = max(y1,y2,y3,y4) - min(y1,y2,y3,y4)
247
+ if len(text) == 1 and (text.isdigit() or text == '.') and h_bbox > 10: # Min height for bbox
248
  x_min, x_max = int(min(x1, x4)), int(max(x2, x3))
249
  y_min, y_max = int(min(y1, y2)), int(max(y3, y4))
250
  digits_info.append((x_min, x_max, y_min, y_max, text, conf))
 
253
  digits_info.sort(key=lambda x: x[0])
254
 
255
  recognized_text = ""
256
+ for idx, (x_min, x_max, y_min, y_max, easyocr_char, easyocr_conf) in enumerate(digits_info):
257
  x_min, y_min = max(0, x_min), max(0, y_min)
258
  x_max, y_max = min(thresh.shape[1], x_max), min(thresh.shape[0], y_max)
259
 
 
261
  continue
262
 
263
  digit_img_crop = thresh[y_min:y_max, x_min:x_max]
264
+ save_debug_image(digit_img_crop, f"07_digit_crop_{idx}_{easyocr_char}")
265
+
266
  # If EasyOCR is very confident about a digit or it's a decimal, use its result directly
267
+ # Or if the digit crop is too small for reliable segment detection
268
+ if easyocr_conf > 0.9 or easyocr_char == '.' or digit_img_crop.shape[0] < 20 or digit_img_crop.shape[1] < 15: # Lowered confidence for direct use
269
  recognized_text += easyocr_char
270
  else:
271
  # Otherwise, try the segment detection
 
284
  if text.count('.') > 1:
285
  text = text.replace('.', '', text.count('.') - 1) # Remove extra decimal points
286
 
287
+ # Basic validation for common weight formats (e.g., 75.5, 120.0, 5.0)
288
+ # Allow numbers to start with . (e.g., .5 -> 0.5) if it's the only character
289
+ if text and re.fullmatch(r"^\d*\.?\d*$", text) and len(text.replace('.', '')) > 0:
290
+ # Handle cases like ".5" -> "0.5"
291
+ if text.startswith('.') and len(text) > 1:
292
+ text = "0" + text
293
+ # Handle cases like "5." -> "5"
294
+ if text.endswith('.') and len(text) > 1:
295
+ text = text.rstrip('.')
296
+
297
+ # Ensure it's not just a single dot or empty after processing
298
+ if text == '.' or text == '':
299
+ return None
300
  return text
301
+ logging.info(f"Custom OCR final text '{recognized_text}' failed validation.")
302
  return None
303
  except Exception as e:
304
  logging.error(f"Custom seven-segment OCR failed: {str(e)}")
 
311
 
312
  brightness = estimate_brightness(img)
313
  # Adjust confidence threshold more dynamically
314
+ conf_threshold = 0.9 if brightness > 150 else (0.8 if brightness > 80 else 0.7) # Adjusted thresholds
315
 
316
  # Detect ROI
317
  roi_img, roi_bbox = detect_roi(img)
318
 
 
 
 
319
  # Try custom seven-segment OCR first
320
  custom_result = custom_seven_segment_ocr(roi_img, roi_bbox)
321
  if custom_result:
 
323
  if "." in custom_result:
324
  int_part, dec_part = custom_result.split(".")
325
  int_part = int_part.lstrip("0") or "0"
326
+ dec_part = dec_part.rstrip('0')
327
+ if not dec_part and int_part != "0": # If decimal part is empty (e.g., "50."), remove the dot
328
+ custom_result = int_part
329
+ elif not dec_part and int_part == "0": # if it's "0." keep it as "0"
330
+ custom_result = "0"
331
+ else:
332
+ custom_result = f"{int_part}.{dec_part}"
333
  else:
334
  custom_result = custom_result.lstrip('0') or "0"
335
 
336
+ # Additional validation for custom result to ensure it's a valid number
337
+ try:
338
+ float(custom_result)
339
+ logging.info(f"Custom OCR result: {custom_result}, Confidence: 100.0%")
340
+ return custom_result, 100.0 # High confidence for custom OCR
341
+ except ValueError:
342
+ logging.warning(f"Custom OCR result '{custom_result}' is not a valid number, falling back.")
343
+ custom_result = None # Force fallback
344
 
345
  # Fallback to EasyOCR if custom OCR fails
346
+ logging.info("Custom OCR failed or invalid, falling back to general EasyOCR.")
347
 
348
  # Apply more aggressive image processing for EasyOCR if custom OCR failed
 
349
  processed_roi_img_gray = cv2.cvtColor(roi_img, cv2.COLOR_BGR2GRAY)
350
 
351
  # Sharpening
 
353
  [-1,9,-1],
354
  [-1,-1,-1]])
355
  sharpened_roi = cv2.filter2D(processed_roi_img_gray, -1, kernel_sharpening)
356
+ save_debug_image(sharpened_roi, "08_fallback_sharpened")
357
 
358
  # Apply adaptive thresholding to the sharpened image for better digit isolation
359
+ # Block size and C constant can be critical
360
  processed_roi_img_final = cv2.adaptiveThreshold(sharpened_roi, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
361
+ cv2.THRESH_BINARY, 15, 3) # Adjusted block size and C
362
+ save_debug_image(processed_roi_img_final, "09_fallback_adaptive_thresh")
363
 
364
  # EasyOCR parameters for general text
365
  # Adjusted parameters for better digit recognition
366
  # added batch_size for potentially better performance on multiple texts
367
  results = easyocr_reader.readtext(processed_roi_img_final, detail=1, paragraph=False,
368
  contrast_ths=0.3, adjust_contrast=0.9,
369
+ text_threshold=0.6, mag_ratio=1.8, # Lowered text_threshold, increased mag_ratio
370
+ allowlist='0123456789.', batch_size=4, y_ths=0.3) # Increased y_ths
371
+
372
  best_weight = None
373
  best_conf = 0.0
374
  best_score = 0.0
 
377
  text = text.lower().strip()
378
 
379
  # More robust character replacements
380
+ text = text.replace(",", ".").replace(";", ".").replace(":", ".").replace(" ", "") # Remove spaces
381
+ text = text.replace("o", "0").replace("O", "0").replace("q", "0").replace("Q", "0")
382
  text = text.replace("s", "5").replace("S", "5")
383
+ text = text.replace("g", "9").replace("G", "6")
384
+ text = text.replace("l", "1").replace("I", "1").replace("|", "1")
385
  text = text.replace("b", "8").replace("B", "8")
386
  text = text.replace("z", "2").replace("Z", "2")
387
+ text = text.replace("a", "4").replace("A", "4")
388
+ text = text.replace("e", "3")
389
+ text = text.replace("t", "7") # 't' can look like '7'
390
+ text = text.replace("~", "") # Common noise
391
+ text = text.replace("`", "")
392
 
393
  # Remove common weight units and other non-numeric characters
394
+ text = re.sub(r"(kgs|kg|k|lb|g|gr|pounds|lbs)\b", "", text) # Added lbs
395
  text = re.sub(r"[^\d\.]", "", text)
396
 
397
  # Handle multiple decimal points (keep only the first one)
 
399
  parts = text.split('.')
400
  text = parts[0] + '.' + ''.join(parts[1:])
401
 
402
+ # Clean up leading/trailing dots if any
403
+ text = text.strip('.')
404
+
405
  # Validate the final text format
406
+ # Allow optional leading zero, and optional decimal with up to 3 places
407
+ if re.fullmatch(r"^\d*\.?\d{0,3}$", text) and len(text.replace('.', '')) > 0: # Ensure at least one digit
408
  try:
409
  weight = float(text)
410
  # Refined scoring for weights within a reasonable range
411
  range_score = 1.0
412
+ if 0.1 <= weight <= 250: # Very common personal scale range
413
+ range_score = 1.5
414
+ elif weight > 250 and weight <= 500: # Larger weights
415
  range_score = 1.2
416
+ elif weight > 500 and weight <= 1000:
417
+ range_score = 1.0
418
  else: # Very small or very large weights
419
+ range_score = 0.5
420
 
421
  digit_count = len(text.replace('.', ''))
422
  digit_score = 1.0
423
+ if digit_count >= 2 and digit_count <= 5: # Prefer weights with 2-5 digits (e.g., 5.0, 75.5, 123.4)
424
  digit_score = 1.3
425
+ elif digit_count == 1: # Single digit weights less common but possible
426
+ digit_score = 0.8
427
 
428
  score = conf * range_score * digit_score
429
 
430
  # Also consider area of the bounding box relative to ROI for confidence
 
431
  if roi_bbox:
432
+ (x_roi, y_roi, w_roi, h_roi) = roi_bbox
433
+ roi_area = w_roi * h_roi
434
+ # Calculate bbox area accurately
435
+ x_min, y_min = int(min(b[0] for b in bbox)), int(min(b[1] for b in bbox))
436
+ x_max, y_max = int(max(b[0] for b in bbox)), int(max(b[1] for b in bbox))
437
+ bbox_area = (x_max - x_min) * (y_max - y_min)
438
+
439
+ if roi_area > 0 and bbox_area / roi_area < 0.03: # Very small bounding boxes might be noise
440
  score *= 0.5
441
+
442
+ # Penalize if bbox is too narrow (e.g., single line detected as digit)
443
+ bbox_aspect_ratio = (x_max - x_min) / (y_max - y_min) if (y_max - y_min) > 0 else 0
444
+ if bbox_aspect_ratio < 0.2: # Very thin bounding boxes
445
+ score *= 0.7
446
 
447
  if score > best_score and conf > conf_threshold:
448
  best_weight = text
449
  best_conf = conf
450
  best_score = score
451
+ logging.info(f"Candidate EasyOCR weight: '{text}', Conf: {conf}, Score: {score}")
452
 
453
  except ValueError:
454
+ logging.warning(f"Could not convert '{text}' to float during EasyOCR fallback.")
455
  continue
456
 
457
  if not best_weight:
 
463
  int_part, dec_part = best_weight.split(".")
464
  int_part = int_part.lstrip("0") or "0" # Remove leading zeros, keep "0" for 0.x
465
  dec_part = dec_part.rstrip('0') # Remove trailing zeros after decimal
466
+
467
  if not dec_part and int_part != "0": # If decimal part is empty (e.g., "50."), remove the dot
468
  best_weight = int_part
469
  elif not dec_part and int_part == "0": # if it's "0." keep it as "0"
 
473
  else:
474
  best_weight = best_weight.lstrip('0') or "0" # Remove leading zeros, keep "0"
475
 
476
+ # Final check for extremely unlikely weights (e.g., 0.0001, 9999)
477
+ try:
478
+ final_float_weight = float(best_weight)
479
+ if final_float_weight < 0.01 or final_float_weight > 1000: # Adjust this range if needed
480
+ logging.warning(f"Detected weight {final_float_weight} is outside typical range, reducing confidence.")
481
+ best_conf *= 0.5 # Reduce confidence for out-of-range values
482
+ except ValueError:
483
+ pass # Should not happen if previous parsing worked
484
+
485
  logging.info(f"Final detected weight: {best_weight}, Confidence: {round(best_conf * 100, 2)}%")
486
  return best_weight, round(best_conf * 100, 2)
487
 
488
  except Exception as e:
489
+ logging.error(f"Weight extraction failed unexpectedly: {str(e)}")
490
+ return "Not detected", 0.0
491
+ ```