Sanjayraju30 commited on
Commit
753fcb8
·
verified ·
1 Parent(s): 204176c

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +131 -335
ocr_engine.py CHANGED
@@ -6,12 +6,12 @@ import logging
6
  from datetime import datetime
7
  import os
8
  from PIL import Image, ImageEnhance
9
- from scipy.signal import convolve2d
10
 
11
  # Set up logging for detailed debugging
12
  logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
13
 
14
- # Initialize EasyOCR with English (enable GPU if available)
15
  easyocr_reader = easyocr.Reader(['en'], gpu=False)
16
 
17
  # Directory for debug images
@@ -36,381 +36,177 @@ def estimate_brightness(img):
36
  return brightness
37
 
38
  def deblur_image(img):
39
- """Apply deconvolution to reduce blur (approximate Wiener filter)"""
40
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
41
- # Create a simple point spread function (PSF) for deblurring
42
- psf = np.ones((5, 5)) / 25
43
- # Normalize image to float32
44
- img_float = gray.astype(np.float32) / 255.0
45
- # Convolve with PSF (simulate blur)
46
- img_blurred = convolve2d(img_float, psf, mode='same')
47
- # Avoid division by zero
48
- img_blurred = np.where(img_blurred == 0, 1e-10, img_blurred)
49
- # Deconvolve
50
- img_deblurred = img_float / img_blurred
51
- img_deblurred = np.clip(img_deblurred * 255, 0, 255).astype(np.uint8)
52
- save_debug_image(img_deblurred, "00_deblurred")
53
- return img_deblurred
54
 
55
  def preprocess_image(img):
56
- """Enhance contrast, brightness, reduce noise, and deblur for digit detection"""
57
- # Deblur first
58
- deblurred = deblur_image(img)
59
-
60
- # Convert to PIL for enhancement
61
- pil_img = Image.fromarray(deblurred)
62
- pil_img = ImageEnhance.Contrast(pil_img).enhance(2.5) # Aggressive contrast
63
- pil_img = ImageEnhance.Brightness(pil_img).enhance(1.5) # Stronger brightness
64
- img_enhanced = np.array(pil_img)
65
  save_debug_image(img_enhanced, "00_preprocessed_pil")
66
 
67
- # Apply CLAHE for local contrast enhancement
68
- clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
69
- enhanced = clahe.apply(img_enhanced)
 
 
 
70
  save_debug_image(enhanced, "00_clahe_enhanced")
71
 
72
- # Aggressive noise reduction
73
- filtered = cv2.bilateralFilter(enhanced, d=15, sigmaColor=150, sigmaSpace=150)
74
  save_debug_image(filtered, "00_bilateral_filtered")
 
 
 
 
 
75
  return filtered
76
 
77
  def normalize_image(img):
78
- """Resize image to standard dimensions while preserving aspect ratio"""
79
  h, w = img.shape[:2]
80
- target_height = 720
81
  aspect_ratio = w / h
82
  target_width = int(target_height * aspect_ratio)
83
- if target_width < 320:
84
- target_width = 320
85
  target_height = int(target_width / aspect_ratio)
86
  resized = cv2.resize(img, (target_width, target_height), interpolation=cv2.INTER_CUBIC)
87
  save_debug_image(resized, "00_normalized")
88
  logging.debug(f"Normalized image to {target_width}x{target_height}")
89
  return resized
90
 
91
- def detect_roi(img):
92
- """Detect the digital display region, with fallback to full image"""
93
  try:
94
- save_debug_image(img, "01_original")
95
- gray = preprocess_image(img)
96
- save_debug_image(gray, "02_preprocessed_grayscale")
97
-
98
- # Try multiple thresholding methods
99
- brightness = estimate_brightness(img)
100
- if brightness > 120:
101
- thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
102
- cv2.THRESH_BINARY_INV, 41, 7) # Inverted for bright displays
103
- save_debug_image(thresh, "03_roi_adaptive_threshold_high")
104
- else:
105
- _, thresh = cv2.threshold(gray, 20, 255, cv2.THRESH_BINARY_INV) # Low threshold for dim displays
106
- save_debug_image(thresh, "03_roi_simple_threshold_low")
107
-
108
- # Morphological operations to connect digits
109
- kernel = np.ones((7, 7), np.uint8)
110
- thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=3)
111
- save_debug_image(thresh, "03_roi_morph_cleaned")
112
-
113
- kernel = np.ones((15, 15), np.uint8)
114
- dilated = cv2.dilate(thresh, kernel, iterations=6)
115
- save_debug_image(dilated, "04_roi_dilated")
116
-
117
- contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
118
-
119
- if contours:
120
- img_area = img.shape[0] * img.shape[1]
121
- valid_contours = []
122
- for c in contours:
123
- area = cv2.contourArea(c)
124
- if 100 < area < (img_area * 0.999): # Extremely relaxed area filter
125
- x, y, w, h = cv2.boundingRect(c)
126
- aspect_ratio = w / h if h > 0 else 0
127
- if 0.3 <= aspect_ratio <= 15.0 and w > 20 and h > 10: # Very relaxed filters
128
- valid_contours.append(c)
129
-
130
- if valid_contours:
131
- contour = max(valid_contours, key=cv2.contourArea)
132
- x, y, w, h = cv2.boundingRect(contour)
133
- padding = 120 # Very generous padding
134
- x, y = max(0, x - padding), max(0, y - padding)
135
- w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
136
- roi_img = img[y:y+h, x:x+w]
137
- save_debug_image(roi_img, "05_detected_roi")
138
- logging.info(f"Detected ROI with dimensions: ({x}, {y}, {w}, {h})")
139
- return roi_img, (x, y, w, h)
140
-
141
- logging.info("No suitable ROI found, returning full image.")
142
- save_debug_image(img, "05_no_roi_full_fallback")
143
- return img, None
144
- except Exception as e:
145
- logging.error(f"ROI detection failed: {str(e)}")
146
- save_debug_image(img, "05_roi_detection_error_fallback")
147
- return img, None
148
-
149
- def detect_segments(digit_img):
150
- """Detect seven-segment patterns in a digit image"""
151
- h, w = digit_img.shape
152
- if h < 6 or w < 3: # Extremely relaxed size constraints
153
- logging.debug(f"Digit image too small: {w}x{h}")
154
- return None
155
-
156
- segments = {
157
- 'top': (int(w*0.05), int(w*0.95), 0, int(h*0.3)),
158
- 'middle': (int(w*0.05), int(w*0.95), int(h*0.35), int(h*0.65)),
159
- 'bottom': (int(w*0.05), int(w*0.95), int(h*0.7), h),
160
- 'left_top': (0, int(w*0.35), int(h*0.05), int(h*0.55)),
161
- 'left_bottom': (0, int(w*0.35), int(h*0.45), int(h*0.95)),
162
- 'right_top': (int(w*0.65), w, int(h*0.05), int(h*0.55)),
163
- 'right_bottom': (int(w*0.65), w, int(h*0.45), int(h*0.95))
164
- }
165
-
166
- segment_presence = {}
167
- for name, (x1, x2, y1, y2) in segments.items():
168
- x1, y1 = max(0, x1), max(0, y1)
169
- x2, y2 = min(w, x2), min(h, y2)
170
- region = digit_img[y1:y2, x1:x2]
171
- if region.size == 0:
172
- segment_presence[name] = False
173
- continue
174
- pixel_count = np.sum(region == 255)
175
- total_pixels = region.size
176
- segment_presence[name] = pixel_count / total_pixels > 0.25 # Very low threshold
177
- logging.debug(f"Segment {name}: {pixel_count}/{total_pixels} = {pixel_count/total_pixels:.2f}")
178
-
179
- digit_patterns = {
180
- '0': ('top', 'bottom', 'left_top', 'left_bottom', 'right_top', 'right_bottom'),
181
- '1': ('right_top', 'right_bottom'),
182
- '2': ('top', 'middle', 'bottom', 'left_bottom', 'right_top'),
183
- '3': ('top', 'middle', 'bottom', 'right_top', 'right_bottom'),
184
- '4': ('middle', 'left_top', 'right_top', 'right_bottom'),
185
- '5': ('top', 'middle', 'bottom', 'left_top', 'right_bottom'),
186
- '6': ('top', 'middle', 'bottom', 'left_top', 'left_bottom', 'right_bottom'),
187
- '7': ('top', 'right_top', 'right_bottom'),
188
- '8': ('top', 'middle', 'bottom', 'left_top', 'left_bottom', 'right_top', 'right_bottom'),
189
- '9': ('top', 'middle', 'bottom', 'left_top', 'right_top', 'right_bottom')
190
- }
191
-
192
- best_match = None
193
- max_score = -1
194
- for digit, pattern in digit_patterns.items():
195
- matches = sum(1 for segment in pattern if segment_presence.get(segment, False))
196
- non_matches_penalty = sum(1 for segment in segment_presence if segment not in pattern and segment_presence[segment])
197
- current_score = matches - non_matches_penalty
198
- if all(segment_presence.get(s, False) for s in pattern):
199
- current_score += 0.5
200
- if current_score > max_score:
201
- max_score = current_score
202
- best_match = digit
203
- elif current_score == max_score and best_match is not None:
204
- current_digit_non_matches = sum(1 for segment in segment_presence if segment not in pattern and segment_presence[segment])
205
- best_digit_pattern = digit_patterns[best_match]
206
- best_digit_non_matches = sum(1 for segment in segment_presence if segment not in best_digit_pattern and segment_presence[segment])
207
- if current_digit_non_matches < best_digit_non_matches:
208
- best_match = digit
209
-
210
- logging.debug(f"Segment presence: {segment_presence}, Detected digit: {best_match}")
211
- return best_match
212
-
213
- def custom_seven_segment_ocr(img, roi_bbox):
214
- """Perform custom OCR for seven-segment displays"""
215
- try:
216
- gray = preprocess_image(img)
217
- brightness = estimate_brightness(img)
218
- # Multiple thresholding approaches
219
- if brightness > 120:
220
- _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
221
- save_debug_image(thresh, "06_roi_otsu_threshold")
222
- else:
223
- _, thresh = cv2.threshold(gray, 15, 255, cv2.THRESH_BINARY_INV) # Very low threshold
224
- save_debug_image(thresh, "06_roi_simple_threshold")
225
-
226
- # Morphological cleaning
227
- kernel = np.ones((5, 5), np.uint8)
228
- thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
229
- save_debug_image(thresh, "06_roi_morph_cleaned")
230
-
231
- results = easyocr_reader.readtext(thresh, detail=1, paragraph=False,
232
- contrast_ths=0.05, adjust_contrast=1.2,
233
- text_threshold=0.2, mag_ratio=6.0,
234
- allowlist='0123456789.-', y_ths=0.7)
235
-
236
- logging.info(f"Custom OCR EasyOCR results: {results}")
237
- if not results:
238
- logging.info("Custom OCR EasyOCR found no digits.")
239
- return None
240
-
241
- digits_info = []
242
- for (bbox, text, conf) in results:
243
- (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
244
- h_bbox = max(y1, y2, y3, y4) - min(y1, y2, y3, y4)
245
- if len(text) <= 2 and any(c in '0123456789.-' for c in text) and h_bbox > 3:
246
- x_min, x_max = int(min(x1, x4)), int(max(x2, x3))
247
- y_min, y_max = int(min(y1, y2)), int(max(y3, y4))
248
- digits_info.append((x_min, x_max, y_min, y_max, text, conf))
249
-
250
- digits_info.sort(key=lambda x: x[0])
251
- recognized_text = ""
252
- for idx, (x_min, x_max, y_min, y_max, easyocr_char, easyocr_conf) in enumerate(digits_info):
253
- x_min, y_min = max(0, x_min), max(0, y_min)
254
- x_max, y_max = min(thresh.shape[1], x_max), min(thresh.shape[0], y_max)
255
- if x_max <= x_min or y_max <= y_min:
256
- continue
257
- digit_img_crop = thresh[y_min:y_max, x_min:x_max]
258
- save_debug_image(digit_img_crop, f"07_digit_crop_{idx}_{easyocr_char}")
259
- if easyocr_conf > 0.7 or easyocr_char in '.-' or digit_img_crop.shape[0] < 6 or digit_img_crop.shape[1] < 3:
260
- recognized_text += easyocr_char
261
- else:
262
- digit_from_segments = detect_segments(digit_img_crop)
263
- if digit_from_segments:
264
- recognized_text += digit_from_segments
265
- else:
266
- recognized_text += easyocr_char
267
-
268
- logging.info(f"Custom OCR before validation, recognized_text: {recognized_text}")
269
- if recognized_text:
270
- return recognized_text
271
- logging.info(f"Custom OCR text '{recognized_text}' is empty.")
272
- return None
273
  except Exception as e:
274
- logging.error(f"Custom seven-segment OCR failed: {str(e)}")
275
  return None
276
 
277
  def extract_weight_from_image(pil_img):
278
- """Extract weight from a PIL image of a digital scale display"""
279
  try:
280
  img = np.array(pil_img)
281
  img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
282
  save_debug_image(img, "00_input_image")
283
 
284
- # Normalize image dimensions
285
  img = normalize_image(img)
286
  brightness = estimate_brightness(img)
287
- conf_threshold = 0.2 if brightness > 120 else 0.1
288
 
289
- roi_img, roi_bbox = detect_roi(img)
290
- custom_result = custom_seven_segment_ocr(roi_img, roi_bbox)
291
- if custom_result:
292
- logging.info(f"Raw custom OCR result: {custom_result}")
293
- # Minimal cleaning
294
- text = re.sub(r"[^\d\.\-]", "", custom_result) # Allow negative signs
295
- if text.count('.') > 1:
296
- text = text.replace('.', '', text.count('.') - 1)
297
- if text:
298
- if text.startswith('.'):
299
- text = "0" + text
300
- if text.endswith('.'):
301
- text = text.rstrip('.')
302
- if text == '.' or text == '':
303
- logging.warning(f"Custom OCR result '{text}' is invalid after cleaning.")
304
- else:
305
- try:
306
- weight = float(text)
307
- logging.info(f"Custom OCR result: {text}, Confidence: 90.0%")
308
- return text, 90.0
309
- except ValueError:
310
- logging.warning(f"Custom OCR result '{text}' is not a valid number, falling back.")
311
- logging.warning(f"Custom OCR result '{custom_result}' failed cleaning, falling back.")
312
 
313
- logging.info("Custom OCR failed or invalid, falling back to general EasyOCR.")
314
- processed_roi_img = preprocess_image(roi_img)
315
-
316
- # Multiple thresholding approaches
317
- if brightness > 120:
318
- thresh = cv2.adaptiveThreshold(processed_roi_img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
319
- cv2.THRESH_BINARY_INV, 51, 9)
320
- save_debug_image(thresh, "09_fallback_adaptive_thresh")
321
  else:
322
- _, thresh = cv2.threshold(processed_roi_img, 15, 255, cv2.THRESH_BINARY_INV)
323
- save_debug_image(thresh, "09_fallback_simple_thresh")
324
 
325
- # Morphological cleaning
326
- kernel = np.ones((5, 5), np.uint8)
327
- thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
328
- save_debug_image(thresh, "09_fallback_morph_cleaned")
329
 
 
330
  results = easyocr_reader.readtext(thresh, detail=1, paragraph=False,
331
- contrast_ths=0.05, adjust_contrast=1.2,
332
- text_threshold=0.1, mag_ratio=7.0,
333
- allowlist='0123456789.-', batch_size=4, y_ths=0.8)
334
-
335
- best_weight = None
336
- best_conf = 0.0
337
- best_score = 0.0
338
- for (bbox, text, conf) in results:
339
- logging.info(f"Fallback EasyOCR raw text: {text}, Confidence: {conf}")
340
- text = text.lower().strip()
341
- text = text.replace(",", ".").replace(";", ".").replace(":", ".").replace(" ", "")
342
- text = text.replace("o", "0").replace("O", "0").replace("q", "0").replace("Q", "0")
343
- text = text.replace("s", "5").replace("S", "5")
344
- text = text.replace("g", "9").replace("G", "6")
345
- text = text.replace("l", "1").replace("I", "1").replace("|", "1")
346
- text = text.replace("b", "8").replace("B", "8")
347
- text = text.replace("z", "2").replace("Z", "2")
348
- text = text.replace("a", "4").replace("A", "4")
349
- text = text.replace("e", "3")
350
- text = text.replace("t", "7")
351
- text = text.replace("~", "").replace("`", "")
352
- text = re.sub(r"(kgs|kg|k|lb|g|gr|pounds|lbs)\b", "", text)
353
- text = re.sub(r"[^\d\.\-]", "", text)
354
- if text.count('.') > 1:
355
- parts = text.split('.')
356
- text = parts[0] + '.' + ''.join(parts[1:])
357
- text = text.strip('.')
358
- if len(text.replace('.', '').replace('-', '')) > 0:
359
- try:
360
- weight = float(text)
361
- range_score = 1.0
362
- if -1000 <= weight <= 1000: # Allow negative weights
363
- range_score = 1.5
364
- elif weight > 1000 and weight <= 2000:
365
- range_score = 1.0
366
- else:
367
- range_score = 0.5
368
- digit_count = len(text.replace('.', '').replace('-', ''))
369
- digit_score = 1.0
370
- if digit_count >= 2 and digit_count <= 6:
371
- digit_score = 1.3
372
- elif digit_count == 1:
373
- digit_score = 0.8
374
- score = conf * range_score * digit_score
375
- if roi_bbox:
376
- (x_roi, y_roi, w_roi, h_roi) = roi_bbox
377
- roi_area = w_roi * h_roi
378
- x_min, y_min = int(min(b[0] for b in bbox)), int(min(b[1] for b in bbox))
379
- x_max, y_max = int(max(b[0] for b in bbox)), int(max(b[1] for b in bbox))
380
- bbox_area = (x_max - x_min) * (y_max - y_min)
381
- if roi_area > 0 and bbox_area / roi_area < 0.01:
382
- score *= 0.5
383
- bbox_aspect_ratio = (x_max - x_min) / (y_max - y_min) if (y_max - y_min) > 0 else 0
384
- if bbox_aspect_ratio < 0.05:
385
- score *= 0.7
386
- if score > best_score and conf > conf_threshold:
387
- best_weight = text
388
- best_conf = conf
389
- best_score = score
390
- logging.info(f"Candidate EasyOCR weight: '{text}', Conf: {conf}, Score: {score}")
391
- except ValueError:
392
- logging.warning(f"Could not convert '{text}' to float during EasyOCR fallback.")
393
- continue
394
 
395
- if not best_weight:
396
- logging.info("No valid weight detected after all attempts.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
  return "Not detected", 0.0
398
 
399
- if "." in best_weight:
400
- int_part, dec_part = best_weight.split(".")
401
- int_part = int_part.lstrip("0") or "0"
402
- dec_part = dec_part.rstrip('0')
403
- if not dec_part and int_part != "0":
404
- best_weight = int_part
405
- elif not dec_part and int_part == "0":
406
- best_weight = "0"
 
 
 
 
 
 
 
 
407
  else:
408
- best_weight = f"{int_part}.{dec_part}"
409
- else:
410
- best_weight = best_weight.lstrip('0') or "0"
411
-
412
- logging.info(f"Final detected weight: {best_weight}, Confidence: {round(best_conf * 100, 2)}%")
413
- return best_weight, round(best_conf * 100, 2)
414
 
415
  except Exception as e:
416
  logging.error(f"Weight extraction failed unexpectedly: {str(e)}")
 
6
  from datetime import datetime
7
  import os
8
  from PIL import Image, ImageEnhance
9
+ import pytesseract
10
 
11
  # Set up logging for detailed debugging
12
  logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
13
 
14
+ # Initialize EasyOCR (enable GPU if available)
15
  easyocr_reader = easyocr.Reader(['en'], gpu=False)
16
 
17
  # Directory for debug images
 
36
  return brightness
37
 
38
  def deblur_image(img):
39
+ """Apply iterative sharpening to reduce blur"""
40
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
41
+ # Multiple sharpening passes
42
+ for _ in range(2):
43
+ kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]])
44
+ gray = cv2.filter2D(gray, -1, kernel)
45
+ gray = np.clip(gray, 0, 255).astype(np.uint8)
46
+ save_debug_image(gray, "00_deblurred")
47
+ return gray
 
 
 
 
 
 
48
 
49
  def preprocess_image(img):
50
+ """Enhance image for digit detection under adverse conditions"""
51
+ # PIL enhancement
52
+ pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
53
+ pil_img = ImageEnhance.Contrast(pil_img).enhance(3.0) # Extreme contrast
54
+ pil_img = ImageEnhance.Brightness(pil_img).enhance(1.8) # Strong brightness
55
+ img_enhanced = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
 
 
 
56
  save_debug_image(img_enhanced, "00_preprocessed_pil")
57
 
58
+ # Deblur
59
+ deblurred = deblur_image(img_enhanced)
60
+
61
+ # CLAHE for local contrast
62
+ clahe = cv2.createCLAHE(clipLimit=4.0, tileGridSize=(8, 8))
63
+ enhanced = clahe.apply(deblurred)
64
  save_debug_image(enhanced, "00_clahe_enhanced")
65
 
66
+ # Noise reduction
67
+ filtered = cv2.bilateralFilter(enhanced, d=17, sigmaColor=200, sigmaSpace=200)
68
  save_debug_image(filtered, "00_bilateral_filtered")
69
+
70
+ # Morphological cleaning
71
+ kernel = np.ones((5, 5), np.uint8)
72
+ filtered = cv2.morphologyEx(filtered, cv2.MORPH_OPEN, kernel, iterations=2)
73
+ save_debug_image(filtered, "00_morph_cleaned")
74
  return filtered
75
 
76
  def normalize_image(img):
77
+ """Resize image to ensure digits are detectable"""
78
  h, w = img.shape[:2]
79
+ target_height = 1080 # High resolution for small digits
80
  aspect_ratio = w / h
81
  target_width = int(target_height * aspect_ratio)
82
+ if target_width < 480:
83
+ target_width = 480
84
  target_height = int(target_width / aspect_ratio)
85
  resized = cv2.resize(img, (target_width, target_height), interpolation=cv2.INTER_CUBIC)
86
  save_debug_image(resized, "00_normalized")
87
  logging.debug(f"Normalized image to {target_width}x{target_height}")
88
  return resized
89
 
90
+ def tesseract_ocr(img):
91
+ """Fallback OCR using Tesseract"""
92
  try:
93
+ config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.-'
94
+ text = pytesseract.image_to_string(img, config=config).strip()
95
+ logging.info(f"Tesseract OCR raw text: {text}")
96
+ return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  except Exception as e:
98
+ logging.error(f"Tesseract OCR failed: {str(e)}")
99
  return None
100
 
101
  def extract_weight_from_image(pil_img):
102
+ """Extract the actual weight shown in the image"""
103
  try:
104
  img = np.array(pil_img)
105
  img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
106
  save_debug_image(img, "00_input_image")
107
 
108
+ # Normalize image
109
  img = normalize_image(img)
110
  brightness = estimate_brightness(img)
111
+ conf_threshold = 0.1 # Very low threshold for blurry images
112
 
113
+ # Preprocess entire image (bypass ROI detection)
114
+ processed_img = preprocess_image(img)
115
+ save_debug_image(processed_img, "01_processed_full")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
+ # Try multiple thresholding approaches
118
+ if brightness > 100:
119
+ thresh = cv2.adaptiveThreshold(processed_img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
120
+ cv2.THRESH_BINARY_INV, 61, 11)
121
+ save_debug_image(thresh, "02_adaptive_threshold")
 
 
 
122
  else:
123
+ _, thresh = cv2.threshold(processed_img, 10, 255, cv2.THRESH_BINARY_INV)
124
+ save_debug_image(thresh, "02_simple_threshold")
125
 
126
+ # Morphological operations
127
+ kernel = np.ones((7, 7), np.uint8)
128
+ thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=3)
129
+ save_debug_image(thresh, "02_morph_cleaned")
130
 
131
+ # EasyOCR attempt
132
  results = easyocr_reader.readtext(thresh, detail=1, paragraph=False,
133
+ contrast_ths=0.05, adjust_contrast=1.5,
134
+ text_threshold=0.05, mag_ratio=10.0,
135
+ allowlist='0123456789.-', y_ths=0.8)
136
+
137
+ logging.info(f"EasyOCR results: {results}")
138
+ recognized_text = ""
139
+ if results:
140
+ # Sort by x-coordinate for left-to-right reading
141
+ sorted_results = sorted(results, key=lambda x: x[0][0][0])
142
+ for _, text, conf in sorted_results:
143
+ logging.info(f"EasyOCR detected: {text}, Confidence: {conf}")
144
+ if conf > conf_threshold and any(c in '0123456789.-' for c in text):
145
+ recognized_text += text
146
+ else:
147
+ logging.info("EasyOCR found no digits.")
148
+
149
+ if not recognized_text:
150
+ # Tesseract fallback
151
+ tesseract_result = tesseract_ocr(thresh)
152
+ if tesseract_result:
153
+ recognized_text = tesseract_result
154
+ logging.info(f"Using Tesseract result: {recognized_text}")
155
+
156
+ logging.info(f"Raw recognized text: {recognized_text}")
157
+ if not recognized_text:
158
+ logging.info("No text detected by EasyOCR or Tesseract.")
159
+ return "Not detected", 0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
+ # Minimal cleaning to preserve actual weight
162
+ text = recognized_text.lower().strip()
163
+ text = text.replace(",", ".").replace(";", ".").replace(":", ".").replace(" ", "")
164
+ text = text.replace("o", "0").replace("O", "0").replace("q", "0").replace("Q", "0")
165
+ text = text.replace("s", "5").replace("S", "5").replace("g", "9").replace("G", "6")
166
+ text = text.replace("l", "1").replace("I", "1").replace("|", "1")
167
+ text = text.replace("b", "8").replace("B", "8").replace("z", "2").replace("Z", "2")
168
+ text = text.replace("a", "4").replace("A", "4").replace("e", "3").replace("t", "7")
169
+ text = re.sub(r"(kgs|kg|k|lb|g|gr|pounds|lbs)\b", "", text)
170
+ text = re.sub(r"[^\d\.\-]", "", text)
171
+
172
+ if text.count('.') > 1:
173
+ parts = text.split('.')
174
+ text = parts[0] + '.' + ''.join(parts[1:])
175
+ text = text.strip('.')
176
+
177
+ if text.startswith('.'):
178
+ text = "0" + text
179
+ if text.endswith('.'):
180
+ text = text.rstrip('.')
181
+
182
+ logging.info(f"Cleaned text: {text}")
183
+ if not text or text == '.' or text == '-':
184
+ logging.warning("Cleaned text is invalid.")
185
  return "Not detected", 0.0
186
 
187
+ try:
188
+ weight = float(text)
189
+ confidence = 80.0 if recognized_text else 50.0
190
+ if weight < -1000 or weight > 2000:
191
+ logging.warning(f"Weight {weight} outside typical range, reducing confidence.")
192
+ confidence *= 0.5
193
+ if "." in text:
194
+ int_part, dec_part = text.split(".")
195
+ int_part = int_part.lstrip("0") or "0"
196
+ dec_part = dec_part.rstrip('0')
197
+ if not dec_part and int_part != "0":
198
+ text = int_part
199
+ elif not dec_part and int_part == "0":
200
+ text = "0"
201
+ else:
202
+ text = f"{int_part}.{dec_part}"
203
  else:
204
+ text = text.lstrip('0') or "0"
205
+ logging.info(f"Final detected weight: {text}, Confidence: {confidence}%")
206
+ return text, confidence
207
+ except ValueError:
208
+ logging.warning(f"Could not convert '{text}' to float.")
209
+ return "Not detected", 0.0
210
 
211
  except Exception as e:
212
  logging.error(f"Weight extraction failed unexpectedly: {str(e)}")