Sanjayraju30 commited on
Commit
d23e846
·
verified ·
1 Parent(s): b1ff045

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +79 -111
ocr_engine.py CHANGED
@@ -32,25 +32,25 @@ def estimate_brightness(img):
32
  return np.mean(gray)
33
 
34
  def preprocess_image(img):
35
- """Preprocess image with aggressive contrast and noise handling."""
36
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
37
  brightness = estimate_brightness(img)
38
- # Maximum CLAHE for extreme contrast
39
- clahe_clip = 10.0 if brightness < 80 else 6.0
40
- clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(6, 6))
41
  enhanced = clahe.apply(gray)
42
  save_debug_image(enhanced, "01_preprocess_clahe")
43
- # Edge-preserving blur
44
- blurred = cv2.bilateralFilter(enhanced, 5, 75, 75)
45
  save_debug_image(blurred, "02_preprocess_blur")
46
- # Adaptive thresholding with small blocks
47
- block_size = max(5, min(15, int(img.shape[0] / 30) * 2 + 1))
48
  thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
49
- cv2.THRESH_BINARY_INV, block_size, 3)
50
- # Morphological operations for digit segmentation
51
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
52
- thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
53
- thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=5)
54
  save_debug_image(thresh, "03_preprocess_morph")
55
  return thresh, enhanced
56
 
@@ -58,12 +58,12 @@ def correct_rotation(img):
58
  """Correct image rotation using edge detection."""
59
  try:
60
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
61
- edges = cv2.Canny(gray, 20, 80, apertureSize=3)
62
- lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=30, minLineLength=15, maxLineGap=5)
63
  if lines is not None:
64
  angles = [np.arctan2(line[0][3] - line[0][1], line[0][2] - line[0][0]) * 180 / np.pi for line in lines]
65
  angle = np.median(angles)
66
- if abs(angle) > 0.3:
67
  h, w = img.shape[:2]
68
  center = (w // 2, h // 2)
69
  M = cv2.getRotationMatrix2D(center, angle, 1.0)
@@ -76,20 +76,20 @@ def correct_rotation(img):
76
  return img
77
 
78
  def detect_roi(img):
79
- """Detect region of interest with flexible contour filtering."""
80
  try:
81
  save_debug_image(img, "04_original")
82
  thresh, enhanced = preprocess_image(img)
83
  brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
84
- block_sizes = [max(5, min(15, int(img.shape[0] / s) * 2 + 1)) for s in [6, 10, 15]]
85
  valid_contours = []
86
  img_area = img.shape[0] * img.shape[1]
87
 
88
  for block_size in block_sizes:
89
  temp_thresh = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
90
- cv2.THRESH_BINARY_INV, block_size, 3)
91
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
92
- temp_thresh = cv2.morphologyEx(temp_thresh, cv2.MORPH_CLOSE, kernel, iterations=5)
93
  save_debug_image(temp_thresh, f"05_roi_threshold_block{block_size}")
94
  contours, _ = cv2.findContours(temp_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
95
 
@@ -98,15 +98,15 @@ def detect_roi(img):
98
  x, y, w, h = cv2.boundingRect(c)
99
  roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
100
  aspect_ratio = w / h
101
- if (200 < area < (img_area * 0.7) and
102
- 0.2 <= aspect_ratio <= 10.0 and w > 50 and h > 20 and roi_brightness > 40):
103
  valid_contours.append((c, area * roi_brightness))
104
  logging.debug(f"Contour (block {block_size}): Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
105
 
106
  if valid_contours:
107
  contour, _ = max(valid_contours, key=lambda x: x[1])
108
  x, y, w, h = cv2.boundingRect(contour)
109
- padding = max(15, min(40, int(min(w, h) * 0.3)))
110
  x, y = max(0, x - padding), max(0, y - padding)
111
  w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
112
  roi_img = img[y:y+h, x:x+w]
@@ -122,98 +122,66 @@ def detect_roi(img):
122
  save_debug_image(img, "06_roi_error_fallback")
123
  return img, None
124
 
125
- def detect_digit_template(digit_img, brightness):
126
- """Digit recognition using template matching with predefined patterns."""
127
  try:
128
  h, w = digit_img.shape
129
- if h < 10 or w < 5:
130
- logging.debug("Digit image too small for template matching.")
131
  return None
132
 
133
- # Predefined digit templates (simplified binary patterns)
134
- digit_templates = {
135
- '0': np.array([[1, 1, 1, 1, 1],
136
- [1, 0, 0, 0, 1],
137
- [1, 0, 0, 0, 1],
138
- [1, 0, 0, 0, 1],
139
- [1, 1, 1, 1, 1]]),
140
- '1': np.array([[0, 0, 1, 0, 0],
141
- [0, 0, 1, 0, 0],
142
- [0, 0, 1, 0, 0],
143
- [0, 0, 1, 0, 0],
144
- [0, 0, 1, 0, 0]]),
145
- '2': np.array([[1, 1, 1, 1, 1],
146
- [0, 0, 0, 0, 1],
147
- [1, 1, 1, 1, 1],
148
- [1, 0, 0, 0, 0],
149
- [1, 1, 1, 1, 1]]),
150
- '3': np.array([[1, 1, 1, 1, 1],
151
- [0, 0, 0, 0, 1],
152
- [1, 1, 1, 1, 1],
153
- [0, 0, 0, 0, 1],
154
- [1, 1, 1, 1, 1]]),
155
- '4': np.array([[1, 0, 0, 0, 1],
156
- [1, 0, 0, 0, 1],
157
- [1, 1, 1, 1, 1],
158
- [0, 0, 0, 0, 1],
159
- [0, 0, 0, 0, 1]]),
160
- '5': np.array([[1, 1, 1, 1, 1],
161
- [1, 0, 0, 0, 0],
162
- [1, 1, 1, 1, 1],
163
- [0, 0, 0, 0, 1],
164
- [1, 1, 1, 1, 1]]),
165
- '6': np.array([[1, 1, 1, 1, 1],
166
- [1, 0, 0, 0, 0],
167
- [1, 1, 1, 1, 1],
168
- [1, 0, 0, 0, 1],
169
- [1, 1, 1, 1, 1]]),
170
- '7': np.array([[1, 1, 1, 1, 1],
171
- [0, 0, 0, 0, 1],
172
- [0, 0, 0, 0, 1],
173
- [0, 0, 0, 0, 1],
174
- [0, 0, 0, 0, 1]]),
175
- '8': np.array([[1, 1, 1, 1, 1],
176
- [1, 0, 0, 0, 1],
177
- [1, 1, 1, 1, 1],
178
- [1, 0, 0, 0, 1],
179
- [1, 1, 1, 1, 1]]),
180
- '9': np.array([[1, 1, 1, 1, 1],
181
- [1, 0, 0, 0, 1],
182
- [1, 1, 1, 1, 1],
183
- [0, 0, 0, 0, 1],
184
- [1, 1, 1, 1, 1]]),
185
- '.': np.array([[0, 0, 0],
186
- [0, 1, 0],
187
- [0, 0, 0]])
188
- }
189
 
190
- # Resize digit_img to match template size (5x5 for digits, 3x3 for decimal)
191
- digit_img_resized = cv2.resize(digit_img, (5, 5), interpolation=cv2.INTER_NEAREST)
192
- best_match, best_score = None, -1
193
- for digit, template in digit_templates.items():
194
- if digit == '.':
195
- digit_img_resized = cv2.resize(digit_img, (3, 3), interpolation=cv2.INTER_NEAREST)
196
- result = cv2.matchTemplate(digit_img_resized, template, cv2.TM_CCOEFF_NORMED)
197
- _, max_val, _, _ = cv2.minMaxLoc(result)
198
- if max_val > 0.7 and max_val > best_score:
199
- best_score = max_val
200
- best_match = digit
201
- logging.debug(f"Template match: {best_match}, Score: {best_score:.2f}")
202
- return best_match if best_score > 0.7 else None
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  except Exception as e:
204
- logging.error(f"Template digit detection failed: {str(e)}")
205
  return None
206
 
207
  def perform_ocr(img, roi_bbox):
208
- """Perform OCR with Tesseract and template-based fallback."""
209
  try:
210
  thresh, enhanced = preprocess_image(img)
211
  brightness = estimate_brightness(img)
212
  pil_img = Image.fromarray(enhanced)
213
  save_debug_image(pil_img, "07_ocr_input")
214
 
215
- # Tesseract with flexible numeric config
216
- custom_config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.'
217
  text = pytesseract.image_to_string(pil_img, config=custom_config)
218
  logging.info(f"Tesseract raw output: {text}")
219
 
@@ -224,17 +192,17 @@ def perform_ocr(img, roi_bbox):
224
  text = text.strip('.')
225
  if text and re.fullmatch(r"^\d*\.?\d*$", text):
226
  text = text.lstrip('0') or '0'
227
- confidence = 97.0 if len(text.replace('.', '')) >= 3 else 94.0
228
  logging.info(f"Validated Tesseract text: {text}, Confidence: {confidence:.2f}%")
229
  return text, confidence
230
 
231
- # Fallback to template-based detection
232
- logging.info("Tesseract failed, using template-based detection.")
233
  contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
234
  digits_info = []
235
  for c in contours:
236
  x, y, w, h = cv2.boundingRect(c)
237
- if w > 8 and h > 10 and 0.1 <= w/h <= 2.0:
238
  digits_info.append((x, x+w, y, y+h))
239
 
240
  if digits_info:
@@ -248,10 +216,10 @@ def perform_ocr(img, roi_bbox):
248
  continue
249
  digit_crop = thresh[y_min:y_max, x_min:x_max]
250
  save_debug_image(digit_crop, f"08_digit_crop_{idx}")
251
- digit = detect_digit_template(digit_crop, brightness)
252
  if digit:
253
  recognized_text += digit
254
- elif x_min - prev_x_max < 8 and prev_x_max != -float('inf'):
255
  recognized_text += '.'
256
  prev_x_max = x_max
257
 
@@ -261,8 +229,8 @@ def perform_ocr(img, roi_bbox):
261
  text = text.strip('.')
262
  if text and re.fullmatch(r"^\d*\.?\d*$", text):
263
  text = text.lstrip('0') or '0'
264
- confidence = 92.0 if len(text.replace('.', '')) >= 3 else 89.0
265
- logging.info(f"Validated template text: {text}, Confidence: {confidence:.2f}%")
266
  return text, confidence
267
 
268
  logging.info("No valid digits detected.")
@@ -272,18 +240,18 @@ def perform_ocr(img, roi_bbox):
272
  return None, 0.0
273
 
274
  def extract_weight_from_image(pil_img):
275
- """Extract weight from any digital scale image."""
276
  try:
277
  img = np.array(pil_img)
278
  img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
279
  save_debug_image(img, "00_input_image")
280
  img = correct_rotation(img)
281
  brightness = estimate_brightness(img)
282
- conf_threshold = 0.8 if brightness > 100 else 0.6
283
 
284
  roi_img, roi_bbox = detect_roi(img)
285
  if roi_bbox:
286
- conf_threshold *= 1.05 if (roi_bbox[2] * roi_bbox[3]) > (img.shape[0] * img.shape[1] * 0.2) else 1.0
287
 
288
  result, confidence = perform_ocr(roi_img, roi_bbox)
289
  if result and confidence >= conf_threshold * 100:
@@ -298,7 +266,7 @@ def extract_weight_from_image(pil_img):
298
 
299
  logging.info("Primary OCR failed, using full image fallback.")
300
  result, confidence = perform_ocr(img, None)
301
- if result and confidence >= conf_threshold * 0.85 * 100:
302
  try:
303
  weight = float(result)
304
  if 0.01 <= weight <= 1000:
 
32
  return np.mean(gray)
33
 
34
  def preprocess_image(img):
35
+ """Preprocess image for OCR with aggressive contrast and noise reduction."""
36
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
37
  brightness = estimate_brightness(img)
38
+ # Aggressive CLAHE
39
+ clahe_clip = 6.0 if brightness < 80 else 4.0
40
+ clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8))
41
  enhanced = clahe.apply(gray)
42
  save_debug_image(enhanced, "01_preprocess_clahe")
43
+ # Minimal blur to preserve edges
44
+ blurred = cv2.GaussianBlur(enhanced, (3, 3), 0)
45
  save_debug_image(blurred, "02_preprocess_blur")
46
+ # Multi-scale thresholding
47
+ block_size = max(9, min(25, int(img.shape[0] / 20) * 2 + 1))
48
  thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
49
+ cv2.THRESH_BINARY_INV, block_size, 7)
50
+ # Morphological operations
51
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
52
+ thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
53
+ thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=3)
54
  save_debug_image(thresh, "03_preprocess_morph")
55
  return thresh, enhanced
56
 
 
58
  """Correct image rotation using edge detection."""
59
  try:
60
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
61
+ edges = cv2.Canny(gray, 30, 100, apertureSize=3)
62
+ lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=40, minLineLength=20, maxLineGap=10)
63
  if lines is not None:
64
  angles = [np.arctan2(line[0][3] - line[0][1], line[0][2] - line[0][0]) * 180 / np.pi for line in lines]
65
  angle = np.median(angles)
66
+ if abs(angle) > 0.5:
67
  h, w = img.shape[:2]
68
  center = (w // 2, h // 2)
69
  M = cv2.getRotationMatrix2D(center, angle, 1.0)
 
76
  return img
77
 
78
  def detect_roi(img):
79
+ """Detect region of interest with aggressive contour filtering."""
80
  try:
81
  save_debug_image(img, "04_original")
82
  thresh, enhanced = preprocess_image(img)
83
  brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
84
+ block_sizes = [max(9, min(25, int(img.shape[0] / s) * 2 + 1)) for s in [10, 15, 20]]
85
  valid_contours = []
86
  img_area = img.shape[0] * img.shape[1]
87
 
88
  for block_size in block_sizes:
89
  temp_thresh = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
90
+ cv2.THRESH_BINARY_INV, block_size, 7)
91
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
92
+ temp_thresh = cv2.morphologyEx(temp_thresh, cv2.MORPH_CLOSE, kernel, iterations=3)
93
  save_debug_image(temp_thresh, f"05_roi_threshold_block{block_size}")
94
  contours, _ = cv2.findContours(temp_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
95
 
 
98
  x, y, w, h = cv2.boundingRect(c)
99
  roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
100
  aspect_ratio = w / h
101
+ if (500 < area < (img_area * 0.5) and
102
+ 0.5 <= aspect_ratio <= 6.0 and w > 80 and h > 40 and roi_brightness > 60):
103
  valid_contours.append((c, area * roi_brightness))
104
  logging.debug(f"Contour (block {block_size}): Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
105
 
106
  if valid_contours:
107
  contour, _ = max(valid_contours, key=lambda x: x[1])
108
  x, y, w, h = cv2.boundingRect(contour)
109
+ padding = max(25, min(70, int(min(w, h) * 0.5)))
110
  x, y = max(0, x - padding), max(0, y - padding)
111
  w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
112
  roi_img = img[y:y+h, x:x+w]
 
122
  save_debug_image(img, "06_roi_error_fallback")
123
  return img, None
124
 
125
+ def detect_digit_contour(digit_img, brightness):
126
+ """Simplified contour-based digit recognition."""
127
  try:
128
  h, w = digit_img.shape
129
+ if h < 20 or w < 10:
130
+ logging.debug("Digit image too small for contour detection.")
131
  return None
132
 
133
+ # Normalize image
134
+ pixel_count = np.sum(digit_img == 255)
135
+ total_pixels = digit_img.size
136
+ density = pixel_count / total_pixels
137
+ if density < 0.1 or density > 0.8:
138
+ return None
139
+
140
+ # Contour analysis
141
+ contours, _ = cv2.findContours(digit_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
142
+ if not contours:
143
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
+ contour = max(contours, key=cv2.contourArea)
146
+ x, y, cw, ch = cv2.boundingRect(contour)
147
+ if cw < 5 or ch < 10:
148
+ return None
149
+
150
+ aspect = cw / ch
151
+ area_ratio = cv2.contourArea(contour) / (cw * ch)
152
+
153
+ # Simplified digit patterns
154
+ if aspect > 0.2 and aspect < 0.4 and area_ratio > 0.5:
155
+ return '1'
156
+ elif aspect > 0.5 and area_ratio > 0.6:
157
+ if density > 0.5:
158
+ return '8'
159
+ elif density > 0.3:
160
+ return '0'
161
+ elif aspect > 0.4 and area_ratio > 0.5:
162
+ if density > 0.4:
163
+ return '3'
164
+ elif density > 0.3:
165
+ return '2'
166
+ elif aspect > 0.3 and area_ratio > 0.4:
167
+ return '5' if density > 0.3 else '7'
168
+ elif aspect > 0.2 and area_ratio > 0.3:
169
+ return '4' if density > 0.2 else '9'
170
+ return None
171
  except Exception as e:
172
+ logging.error(f"Contour digit detection failed: {str(e)}")
173
  return None
174
 
175
  def perform_ocr(img, roi_bbox):
176
+ """Perform OCR with Tesseract and contour-based fallback."""
177
  try:
178
  thresh, enhanced = preprocess_image(img)
179
  brightness = estimate_brightness(img)
180
  pil_img = Image.fromarray(enhanced)
181
  save_debug_image(pil_img, "07_ocr_input")
182
 
183
+ # Tesseract with aggressive numeric config
184
+ custom_config = r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.'
185
  text = pytesseract.image_to_string(pil_img, config=custom_config)
186
  logging.info(f"Tesseract raw output: {text}")
187
 
 
192
  text = text.strip('.')
193
  if text and re.fullmatch(r"^\d*\.?\d*$", text):
194
  text = text.lstrip('0') or '0'
195
+ confidence = 98.0 if len(text.replace('.', '')) >= 3 else 95.0
196
  logging.info(f"Validated Tesseract text: {text}, Confidence: {confidence:.2f}%")
197
  return text, confidence
198
 
199
+ # Fallback to contour-based detection
200
+ logging.info("Tesseract failed, using contour-based detection.")
201
  contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
202
  digits_info = []
203
  for c in contours:
204
  x, y, w, h = cv2.boundingRect(c)
205
+ if w > 15 and h > 20 and 0.2 <= w/h <= 1.2:
206
  digits_info.append((x, x+w, y, y+h))
207
 
208
  if digits_info:
 
216
  continue
217
  digit_crop = thresh[y_min:y_max, x_min:x_max]
218
  save_debug_image(digit_crop, f"08_digit_crop_{idx}")
219
+ digit = detect_digit_contour(digit_crop, brightness)
220
  if digit:
221
  recognized_text += digit
222
+ elif x_min - prev_x_max < 15 and prev_x_max != -float('inf'):
223
  recognized_text += '.'
224
  prev_x_max = x_max
225
 
 
229
  text = text.strip('.')
230
  if text and re.fullmatch(r"^\d*\.?\d*$", text):
231
  text = text.lstrip('0') or '0'
232
+ confidence = 92.0 if len(text.replace('.', '')) >= 3 else 90.0
233
+ logging.info(f"Validated contour text: {text}, Confidence: {confidence:.2f}%")
234
  return text, confidence
235
 
236
  logging.info("No valid digits detected.")
 
240
  return None, 0.0
241
 
242
  def extract_weight_from_image(pil_img):
243
+ """Extract weight from a digital scale image."""
244
  try:
245
  img = np.array(pil_img)
246
  img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
247
  save_debug_image(img, "00_input_image")
248
  img = correct_rotation(img)
249
  brightness = estimate_brightness(img)
250
+ conf_threshold = 0.9 if brightness > 100 else 0.7
251
 
252
  roi_img, roi_bbox = detect_roi(img)
253
  if roi_bbox:
254
+ conf_threshold *= 1.15 if (roi_bbox[2] * roi_bbox[3]) > (img.shape[0] * img.shape[1] * 0.3) else 1.0
255
 
256
  result, confidence = perform_ocr(roi_img, roi_bbox)
257
  if result and confidence >= conf_threshold * 100:
 
266
 
267
  logging.info("Primary OCR failed, using full image fallback.")
268
  result, confidence = perform_ocr(img, None)
269
+ if result and confidence >= conf_threshold * 0.95 * 100:
270
  try:
271
  weight = float(result)
272
  if 0.01 <= weight <= 1000: