Sanjayraju30 commited on
Commit
b613b80
·
verified ·
1 Parent(s): 373b0d2

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +76 -4
ocr_engine.py CHANGED
@@ -93,7 +93,7 @@ def detect_roi(img):
93
  logging.info("No suitable ROI found, attempting fallback criteria.")
94
  # Fallback with relaxed criteria
95
  valid_contours = [c for c in contours if 500 < cv2.contourArea(c) < (img_area * 0.95) and
96
- 0.8 <= cv2.boundingRect(c)[2]/cv2.boundingRect(c)[3] <= 12.0]
97
  if valid_contours:
98
  contour = max(valid_contours, key=cv2.contourArea)
99
  x, y, w, h = cv2.boundingRect(contour)
@@ -254,11 +254,11 @@ def extract_weight_from_image(pil_img):
254
 
255
  brightness = estimate_brightness(img)
256
  conf_threshold = 0.7 if brightness > 150 else (0.6 if brightness > 80 else 0.4)
257
- if roi_bbox := detect_roi(img)[1]:
 
258
  roi_area = roi_bbox[2] * roi_bbox[3]
259
  conf_threshold *= 1.2 if roi_area > (img.shape[0] * img.shape[1] * 0.5) else 1.0
260
 
261
- roi_img, roi_bbox = detect_roi(img)
262
  custom_result = custom_seven_segment_ocr(roi_img, roi_bbox)
263
  if custom_result:
264
  try:
@@ -285,4 +285,76 @@ def extract_weight_from_image(pil_img):
285
  results = easyocr_reader.readtext(final_roi, detail=1, paragraph=False,
286
  contrast_ths=0.4, adjust_contrast=1.2,
287
  text_threshold=0.5, mag_ratio=4.0,
288
- allowlist='0123456789. kglb', batch_size=batch Horrible error: invalid syntax
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  logging.info("No suitable ROI found, attempting fallback criteria.")
94
  # Fallback with relaxed criteria
95
  valid_contours = [c for c in contours if 500 < cv2.contourArea(c) < (img_area * 0.95) and
96
+ 0.8 <= cv2.boundingRect(c)[2]/cv2.boundingRect(c)[3] <= 12.0]
97
  if valid_contours:
98
  contour = max(valid_contours, key=cv2.contourArea)
99
  x, y, w, h = cv2.boundingRect(contour)
 
254
 
255
  brightness = estimate_brightness(img)
256
  conf_threshold = 0.7 if brightness > 150 else (0.6 if brightness > 80 else 0.4)
257
+ roi_img, roi_bbox = detect_roi(img)
258
+ if roi_bbox:
259
  roi_area = roi_bbox[2] * roi_bbox[3]
260
  conf_threshold *= 1.2 if roi_area > (img.shape[0] * img.shape[1] * 0.5) else 1.0
261
 
 
262
  custom_result = custom_seven_segment_ocr(roi_img, roi_bbox)
263
  if custom_result:
264
  try:
 
285
  results = easyocr_reader.readtext(final_roi, detail=1, paragraph=False,
286
  contrast_ths=0.4, adjust_contrast=1.2,
287
  text_threshold=0.5, mag_ratio=4.0,
288
+ allowlist='0123456789. kglb', batch_size=batch_size, y_ths=0.2)
289
+
290
+ best_weight = None
291
+ best_conf = 0.0
292
+ best_score = 0.0
293
+ unit = None
294
+ for (bbox, text, conf) in results:
295
+ if 'kg' in text.lower():
296
+ unit = 'kg'
297
+ continue
298
+ elif 'g' in text.lower():
299
+ unit = 'g'
300
+ continue
301
+ elif 'lb' in text.lower():
302
+ unit = 'lb'
303
+ continue
304
+ text = re.sub(r"[^\d\.]", "", text)
305
+ if text.count('.') > 1:
306
+ text = text.replace('.', '', text.count('.') - 1)
307
+ text = text.strip('.')
308
+ if re.fullmatch(r"^\d*\.?\d*$", text):
309
+ try:
310
+ weight = float(text)
311
+ if unit == 'g':
312
+ weight /= 1000 # Convert grams to kilograms
313
+ elif unit == 'lb':
314
+ weight *= 0.453592 # Convert pounds to kilograms
315
+ range_score = 1.5 if 0.01 <= weight <= 500 else 0.8
316
+ digit_count = len(text.replace('.', ''))
317
+ digit_score = 1.3 if 2 <= digit_count <= 6 else 0.9
318
+ score = conf * range_score * digit_score
319
+ if roi_bbox:
320
+ (x_roi, y_roi, w_roi, h_roi) = roi_bbox
321
+ roi_area = w_roi * h_roi
322
+ x_min, y_min = int(min(b[0] for b in bbox)), int(min(b[1] for b in bbox))
323
+ x_max, y_max = int(max(b[0] for b in bbox)), int(max(b[1] for b in bbox))
324
+ bbox_area = (x_max - x_min) * (y_max - y_min)
325
+ if roi_area > 0 and bbox_area / roi_area < 0.05:
326
+ score *= 0.6
327
+ if score > best_score and conf > conf_threshold:
328
+ best_weight = text
329
+ best_conf = conf
330
+ best_score = score
331
+ logging.info(f"Candidate EasyOCR weight: '{text}', Unit: {unit or 'none'}, Conf: {conf}, Score: {score}")
332
+ except ValueError:
333
+ logging.warning(f"Could not convert '{text}' to float during EasyOCR fallback.")
334
+
335
+ if not best_weight:
336
+ logging.info("No valid weight detected after all attempts.")
337
+ return "Not detected", 0.0
338
+
339
+ # Format the weight
340
+ if "." in best_weight:
341
+ int_part, dec_part = best_weight.split(".")
342
+ int_part = int_part.lstrip("0") or "0"
343
+ dec_part = dec_part.rstrip('0')
344
+ best_weight = f"{int_part}.{dec_part}" if dec_part else int_part
345
+ else:
346
+ best_weight = best_weight.lstrip('0') or "0"
347
+
348
+ try:
349
+ final_weight = float(best_weight)
350
+ if final_weight < 0.01 or final_weight > 500:
351
+ best_conf *= 0.7
352
+ except ValueError:
353
+ pass
354
+
355
+ logging.info(f"Final detected weight: {best_weight}, Unit: {unit or 'none'}, Confidence: {round(best_conf * 100, 2)}%")
356
+ return best_weight, round(best_conf * 100, 2)
357
+
358
+ except Exception as e:
359
+ logging.error(f"Weight extraction failed unexpectedly: {str(e)}")
360
+ return "Not detected", 0.0