Sanjayraju30 commited on
Commit
7c31f9a
·
verified ·
1 Parent(s): 0f29b7c

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +55 -34
ocr_engine.py CHANGED
@@ -19,40 +19,58 @@ def detect_roi(img):
19
  """Detect and crop the region of interest (likely the digital display)"""
20
  try:
21
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
22
- # Threshold to isolate bright areas (like illuminated displays)
23
  brightness = estimate_brightness(img)
24
- thresh_value = 200 if brightness > 100 else 150 # Adjust based on brightness
25
  _, thresh = cv2.threshold(gray, thresh_value, 255, cv2.THRESH_BINARY)
26
- # Dilate to connect digits
27
- kernel = np.ones((7, 7), np.uint8)
28
- dilated = cv2.dilate(thresh, kernel, iterations=2)
29
  # Find contours
30
  contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
31
  if contours:
32
- # Get the largest contour with reasonable size
33
  valid_contours = [c for c in contours if cv2.contourArea(c) > 500]
34
  if valid_contours:
35
- largest_contour = max(valid_contours, key=cv2.contourArea)
36
- x, y, w, h = cv2.boundingRect(largest_contour)
37
- # Add more padding and ensure bounds
38
- x, y = max(0, x-30), max(0, y-30)
39
- w, h = min(w+60, img.shape[1]-x), min(h+60, img.shape[0]-y)
40
- if w > 50 and h > 30:
41
- return img[y:y+h, x:x+w]
42
  return img
43
  except Exception as e:
44
  logging.error(f"ROI detection failed: {str(e)}")
45
  return img
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  def enhance_image(img, mode="standard"):
48
  """Enhance image with different modes for multi-scale processing"""
49
  try:
50
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
51
 
52
  if mode == "seven_segment":
53
- # Gentle preprocessing for seven-segment displays
54
- denoised = cv2.GaussianBlur(gray, (5, 5), 0)
 
 
 
 
55
  _, thresh = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
 
56
  elif mode == "high_contrast":
57
  denoised = cv2.bilateralFilter(gray, d=11, sigmaColor=100, sigmaSpace=100)
58
  clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
@@ -66,7 +84,7 @@ def enhance_image(img, mode="standard"):
66
  clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
67
  thresh = clahe.apply(denoised)
68
 
69
- if mode != "seven_segment":
70
  thresh = cv2.adaptiveThreshold(thresh, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
71
  cv2.THRESH_BINARY, 11, 2)
72
 
@@ -74,21 +92,22 @@ def enhance_image(img, mode="standard"):
74
  kernel = np.ones((3, 3), np.uint8)
75
  morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
76
 
77
- # Reduced sharpening for seven-segment displays
78
- brightness = estimate_brightness(img)
79
- sharpen_strength = 3 if mode == "seven_segment" or brightness > 100 else 5
80
- sharpen_kernel = np.array([[0, -1, 0], [-1, sharpen_strength, -1], [0, -1, 0]])
81
- sharpened = cv2.filter2D(morphed, -1, sharpen_kernel)
 
82
 
83
  # Dynamic resizing
84
- h, w = sharpened.shape
85
  target_size = 800
86
  scale_factor = min(target_size / max(h, w), 2.0) if max(h, w) < 300 else min(target_size / max(h, w), 1.0)
87
  if scale_factor != 1.0:
88
- sharpened = cv2.resize(sharpened, None, fx=scale_factor, fy=scale_factor,
89
- interpolation=cv2.INTER_CUBIC if scale_factor > 1 else cv2.INTER_AREA)
90
 
91
- return sharpened
92
  except Exception as e:
93
  logging.error(f"Image enhancement failed (mode={mode}): {str(e)}")
94
  return img
@@ -100,18 +119,19 @@ def extract_weight_from_image(pil_img):
100
 
101
  # Estimate brightness for adaptive thresholding
102
  brightness = estimate_brightness(img)
103
- conf_threshold = 0.5 if brightness > 100 else 0.4 # Stricter for bright displays
104
 
105
  # Detect ROI
106
  roi_img = detect_roi(img)
107
 
108
  # Process multiple image versions
109
  images_to_process = [
110
- ("seven_segment", enhance_image(roi_img, mode="seven_segment"), {'contrast_ths': 0.3, 'allowlist': '0123456789.'}),
111
- ("standard", enhance_image(roi_img, mode="standard"), {'contrast_ths': 0.1}),
112
- ("high_contrast", enhance_image(roi_img, mode="high_contrast"), {'contrast_ths': 0.1}),
113
- ("low_noise", enhance_image(roi_img, mode="low_noise"), {'contrast_ths': 0.1}),
114
- ("original", roi_img, {'contrast_ths': 0.3, 'allowlist': '0123456789.'})
 
115
  ]
116
 
117
  best_weight = None
@@ -123,6 +143,8 @@ def extract_weight_from_image(pil_img):
123
  results = easyocr_reader.readtext(proc_img, detail=1, paragraph=False, **ocr_params)
124
 
125
  for (bbox, text, conf) in results:
 
 
126
  original_text = text
127
  text = text.lower().strip()
128
 
@@ -135,7 +157,6 @@ def extract_weight_from_image(pil_img):
135
  text = text.replace("b", "8").replace("B", "8")
136
  text = text.replace("z", "2").replace("Z", "2")
137
  text = text.replace("q", "9").replace("Q", "9")
138
- text = text.replace("6", "2").replace("9", "2") # Specific correction for seven-segment
139
  text = text.replace("kgs", "").replace("kg", "").replace("k", "")
140
  text = re.sub(r"[^\d\.]", "", text)
141
 
@@ -145,8 +166,8 @@ def extract_weight_from_image(pil_img):
145
  weight = float(text)
146
  # Score based on realistic weight range (0.1–500 kg)
147
  range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
148
- # Prefer two-digit weights for scales
149
- digit_score = 1.1 if 10 <= weight < 100 else 1.0
150
  score = conf * range_score * digit_score
151
  if score > best_score and conf > conf_threshold:
152
  best_weight = text
 
19
  """Detect and crop the region of interest (likely the digital display)"""
20
  try:
21
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
22
+ # Stricter threshold for bright areas
23
  brightness = estimate_brightness(img)
24
+ thresh_value = 220 if brightness > 100 else 180
25
  _, thresh = cv2.threshold(gray, thresh_value, 255, cv2.THRESH_BINARY)
26
+ # Morphological operations to connect digits
27
+ kernel = np.ones((9, 9), np.uint8)
28
+ dilated = cv2.dilate(thresh, kernel, iterations=3)
29
  # Find contours
30
  contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
31
  if contours:
32
+ # Filter contours by size and aspect ratio (typical for displays)
33
  valid_contours = [c for c in contours if cv2.contourArea(c) > 500]
34
  if valid_contours:
35
+ for contour in sorted(valid_contours, key=cv2.contourArea, reverse=True):
36
+ x, y, w, h = cv2.boundingRect(contour)
37
+ aspect_ratio = w / h
38
+ if 1.5 <= aspect_ratio <= 4.0 and w > 50 and h > 30: # Typical display aspect ratio
39
+ x, y = max(0, x-40), max(0, y-40)
40
+ w, h = min(w+80, img.shape[1]-x), min(h+80, img.shape[0]-y)
41
+ return img[y:y+h, x:x+w]
42
  return img
43
  except Exception as e:
44
  logging.error(f"ROI detection failed: {str(e)}")
45
  return img
46
 
47
+ def correct_seven_segment(text, bbox):
48
+ """Correct common seven-segment misreads based on bounding box shape"""
49
+ if "6" in text:
50
+ # Check bounding box aspect ratio to differentiate "6" from "2"
51
+ (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
52
+ width = abs(x2 - x1)
53
+ height = abs(y2 - y1)
54
+ aspect_ratio = width / height if height > 0 else 1.0
55
+ # "2" typically has a more rectangular shape in seven-segment
56
+ if aspect_ratio > 0.5: # Adjust based on typical "2" vs "6" shapes
57
+ text = text.replace("6", "2")
58
+ return text
59
+
60
  def enhance_image(img, mode="standard"):
61
  """Enhance image with different modes for multi-scale processing"""
62
  try:
63
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
64
 
65
  if mode == "seven_segment":
66
+ # Minimal preprocessing for seven-segment displays
67
+ _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
68
+ return thresh
69
+ elif mode == "minimal":
70
+ # Very light preprocessing
71
+ denoised = cv2.GaussianBlur(gray, (3, 3), 0)
72
  _, thresh = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
73
+ return thresh
74
  elif mode == "high_contrast":
75
  denoised = cv2.bilateralFilter(gray, d=11, sigmaColor=100, sigmaSpace=100)
76
  clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
 
84
  clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
85
  thresh = clahe.apply(denoised)
86
 
87
+ if mode not in ["seven_segment", "minimal"]:
88
  thresh = cv2.adaptiveThreshold(thresh, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
89
  cv2.THRESH_BINARY, 11, 2)
90
 
 
92
  kernel = np.ones((3, 3), np.uint8)
93
  morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
94
 
95
+ # Skip sharpening for seven-segment and minimal modes
96
+ if mode not in ["seven_segment", "minimal"]:
97
+ brightness = estimate_brightness(img)
98
+ sharpen_strength = 3 if brightness > 100 else 5
99
+ sharpen_kernel = np.array([[0, -1, 0], [-1, sharpen_strength, -1], [0, -1, 0]])
100
+ morphed = cv2.filter2D(morphed, -1, sharpen_kernel)
101
 
102
  # Dynamic resizing
103
+ h, w = morphed.shape
104
  target_size = 800
105
  scale_factor = min(target_size / max(h, w), 2.0) if max(h, w) < 300 else min(target_size / max(h, w), 1.0)
106
  if scale_factor != 1.0:
107
+ morphed = cv2.resize(morphed, None, fx=scale_factor, fy=scale_factor,
108
+ interpolation=cv2.INTER_CUBIC if scale_factor > 1 else cv2.INTER_AREA)
109
 
110
+ return morphed
111
  except Exception as e:
112
  logging.error(f"Image enhancement failed (mode={mode}): {str(e)}")
113
  return img
 
119
 
120
  # Estimate brightness for adaptive thresholding
121
  brightness = estimate_brightness(img)
122
+ conf_threshold = 0.7 if brightness > 100 else 0.5 # Stricter for bright displays
123
 
124
  # Detect ROI
125
  roi_img = detect_roi(img)
126
 
127
  # Process multiple image versions
128
  images_to_process = [
129
+ ("seven_segment", enhance_image(roi_img, mode="seven_segment"), {'contrast_ths': 0.2, 'adjust_contrast': 0.5, 'allowlist': '0123456789.'}),
130
+ ("minimal", enhance_image(roi_img, mode="minimal"), {'contrast_ths': 0.2, 'adjust_contrast': 0.5, 'allowlist': '0123456789.'}),
131
+ ("standard", enhance_image(roi_img, mode="standard"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5}),
132
+ ("high_contrast", enhance_image(roi_img, mode="high_contrast"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5}),
133
+ ("low_noise", enhance_image(roi_img, mode="low_noise"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5}),
134
+ ("original", roi_img, {'contrast_ths': 0.2, 'adjust_contrast': 0.5, 'allowlist': '0123456789.'})
135
  ]
136
 
137
  best_weight = None
 
143
  results = easyocr_reader.readtext(proc_img, detail=1, paragraph=False, **ocr_params)
144
 
145
  for (bbox, text, conf) in results:
146
+ # Apply seven-segment correction
147
+ text = correct_seven_segment(text, bbox)
148
  original_text = text
149
  text = text.lower().strip()
150
 
 
157
  text = text.replace("b", "8").replace("B", "8")
158
  text = text.replace("z", "2").replace("Z", "2")
159
  text = text.replace("q", "9").replace("Q", "9")
 
160
  text = text.replace("kgs", "").replace("kg", "").replace("k", "")
161
  text = re.sub(r"[^\d\.]", "", text)
162
 
 
166
  weight = float(text)
167
  # Score based on realistic weight range (0.1–500 kg)
168
  range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
169
+ # Strongly prefer two-digit weights for scales
170
+ digit_score = 1.5 if 10 <= weight < 100 else 1.0
171
  score = conf * range_score * digit_score
172
  if score > best_score and conf > conf_threshold:
173
  best_weight = text