Sanjayraju30 commited on
Commit
b7eaba3
·
verified ·
1 Parent(s): 770a6ee

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +111 -79
ocr_engine.py CHANGED
@@ -32,25 +32,25 @@ def estimate_brightness(img):
32
  return np.mean(gray)
33
 
34
  def preprocess_image(img):
35
- """Preprocess image for OCR with aggressive contrast and noise reduction."""
36
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
37
  brightness = estimate_brightness(img)
38
- # Aggressive CLAHE
39
- clahe_clip = 6.0 if brightness < 80 else 4.0
40
- clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8))
41
  enhanced = clahe.apply(gray)
42
  save_debug_image(enhanced, "01_preprocess_clahe")
43
- # Minimal blur to preserve edges
44
- blurred = cv2.GaussianBlur(enhanced, (3, 3), 0)
45
  save_debug_image(blurred, "02_preprocess_blur")
46
- # Multi-scale thresholding
47
- block_size = max(9, min(25, int(img.shape[0] / 20) * 2 + 1))
48
  thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
49
- cv2.THRESH_BINARY_INV, block_size, 7)
50
- # Morphological operations
51
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
52
- thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
53
- thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=3)
54
  save_debug_image(thresh, "03_preprocess_morph")
55
  return thresh, enhanced
56
 
@@ -58,12 +58,12 @@ def correct_rotation(img):
58
  """Correct image rotation using edge detection."""
59
  try:
60
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
61
- edges = cv2.Canny(gray, 30, 100, apertureSize=3)
62
- lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=40, minLineLength=20, maxLineGap=10)
63
  if lines is not None:
64
  angles = [np.arctan2(line[0][3] - line[0][1], line[0][2] - line[0][0]) * 180 / np.pi for line in lines]
65
  angle = np.median(angles)
66
- if abs(angle) > 0.5:
67
  h, w = img.shape[:2]
68
  center = (w // 2, h // 2)
69
  M = cv2.getRotationMatrix2D(center, angle, 1.0)
@@ -76,20 +76,20 @@ def correct_rotation(img):
76
  return img
77
 
78
  def detect_roi(img):
79
- """Detect region of interest with aggressive contour filtering."""
80
  try:
81
  save_debug_image(img, "04_original")
82
  thresh, enhanced = preprocess_image(img)
83
  brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
84
- block_sizes = [max(9, min(25, int(img.shape[0] / s) * 2 + 1)) for s in [10, 15, 20]]
85
  valid_contours = []
86
  img_area = img.shape[0] * img.shape[1]
87
 
88
  for block_size in block_sizes:
89
  temp_thresh = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
90
- cv2.THRESH_BINARY_INV, block_size, 7)
91
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
92
- temp_thresh = cv2.morphologyEx(temp_thresh, cv2.MORPH_CLOSE, kernel, iterations=3)
93
  save_debug_image(temp_thresh, f"05_roi_threshold_block{block_size}")
94
  contours, _ = cv2.findContours(temp_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
95
 
@@ -98,15 +98,15 @@ def detect_roi(img):
98
  x, y, w, h = cv2.boundingRect(c)
99
  roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
100
  aspect_ratio = w / h
101
- if (500 < area < (img_area * 0.5) and
102
- 0.5 <= aspect_ratio <= 6.0 and w > 80 and h > 40 and roi_brightness > 60):
103
  valid_contours.append((c, area * roi_brightness))
104
  logging.debug(f"Contour (block {block_size}): Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
105
 
106
  if valid_contours:
107
  contour, _ = max(valid_contours, key=lambda x: x[1])
108
  x, y, w, h = cv2.boundingRect(contour)
109
- padding = max(25, min(70, int(min(w, h) * 0.5)))
110
  x, y = max(0, x - padding), max(0, y - padding)
111
  w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
112
  roi_img = img[y:y+h, x:x+w]
@@ -122,66 +122,98 @@ def detect_roi(img):
122
  save_debug_image(img, "06_roi_error_fallback")
123
  return img, None
124
 
125
- def detect_digit_contour(digit_img, brightness):
126
- """Simplified contour-based digit recognition."""
127
  try:
128
  h, w = digit_img.shape
129
- if h < 20 or w < 10:
130
- logging.debug("Digit image too small for contour detection.")
131
  return None
132
 
133
- # Normalize image
134
- pixel_count = np.sum(digit_img == 255)
135
- total_pixels = digit_img.size
136
- density = pixel_count / total_pixels
137
- if density < 0.1 or density > 0.8:
138
- return None
139
-
140
- # Contour analysis
141
- contours, _ = cv2.findContours(digit_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
142
- if not contours:
143
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
- contour = max(contours, key=cv2.contourArea)
146
- x, y, cw, ch = cv2.boundingRect(contour)
147
- if cw < 5 or ch < 10:
148
- return None
149
-
150
- aspect = cw / ch
151
- area_ratio = cv2.contourArea(contour) / (cw * ch)
152
-
153
- # Simplified digit patterns
154
- if aspect > 0.2 and aspect < 0.4 and area_ratio > 0.5:
155
- return '1'
156
- elif aspect > 0.5 and area_ratio > 0.6:
157
- if density > 0.5:
158
- return '8'
159
- elif density > 0.3:
160
- return '0'
161
- elif aspect > 0.4 and area_ratio > 0.5:
162
- if density > 0.4:
163
- return '3'
164
- elif density > 0.3:
165
- return '2'
166
- elif aspect > 0.3 and area_ratio > 0.4:
167
- return '5' if density > 0.3 else '7'
168
- elif aspect > 0.2 and area_ratio > 0.3:
169
- return '4' if density > 0.2 else '9'
170
- return None
171
  except Exception as e:
172
- logging.error(f"Contour digit detection failed: {str(e)}")
173
  return None
174
 
175
  def perform_ocr(img, roi_bbox):
176
- """Perform OCR with Tesseract and contour-based fallback."""
177
  try:
178
  thresh, enhanced = preprocess_image(img)
179
  brightness = estimate_brightness(img)
180
  pil_img = Image.fromarray(enhanced)
181
  save_debug_image(pil_img, "07_ocr_input")
182
 
183
- # Tesseract with aggressive numeric config
184
- custom_config = r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.'
185
  text = pytesseract.image_to_string(pil_img, config=custom_config)
186
  logging.info(f"Tesseract raw output: {text}")
187
 
@@ -192,17 +224,17 @@ def perform_ocr(img, roi_bbox):
192
  text = text.strip('.')
193
  if text and re.fullmatch(r"^\d*\.?\d*$", text):
194
  text = text.lstrip('0') or '0'
195
- confidence = 98.0 if len(text.replace('.', '')) >= 3 else 95.0
196
  logging.info(f"Validated Tesseract text: {text}, Confidence: {confidence:.2f}%")
197
  return text, confidence
198
 
199
- # Fallback to contour-based detection
200
- logging.info("Tesseract failed, using contour-based detection.")
201
  contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
202
  digits_info = []
203
  for c in contours:
204
  x, y, w, h = cv2.boundingRect(c)
205
- if w > 15 and h > 20 and 0.2 <= w/h <= 1.2:
206
  digits_info.append((x, x+w, y, y+h))
207
 
208
  if digits_info:
@@ -216,10 +248,10 @@ def perform_ocr(img, roi_bbox):
216
  continue
217
  digit_crop = thresh[y_min:y_max, x_min:x_max]
218
  save_debug_image(digit_crop, f"08_digit_crop_{idx}")
219
- digit = detect_digit_contour(digit_crop, brightness)
220
  if digit:
221
  recognized_text += digit
222
- elif x_min - prev_x_max < 15 and prev_x_max != -float('inf'):
223
  recognized_text += '.'
224
  prev_x_max = x_max
225
 
@@ -229,8 +261,8 @@ def perform_ocr(img, roi_bbox):
229
  text = text.strip('.')
230
  if text and re.fullmatch(r"^\d*\.?\d*$", text):
231
  text = text.lstrip('0') or '0'
232
- confidence = 92.0 if len(text.replace('.', '')) >= 3 else 90.0
233
- logging.info(f"Validated contour text: {text}, Confidence: {confidence:.2f}%")
234
  return text, confidence
235
 
236
  logging.info("No valid digits detected.")
@@ -240,18 +272,18 @@ def perform_ocr(img, roi_bbox):
240
  return None, 0.0
241
 
242
  def extract_weight_from_image(pil_img):
243
- """Extract weight from a digital scale image."""
244
  try:
245
  img = np.array(pil_img)
246
  img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
247
  save_debug_image(img, "00_input_image")
248
  img = correct_rotation(img)
249
  brightness = estimate_brightness(img)
250
- conf_threshold = 0.9 if brightness > 100 else 0.7
251
 
252
  roi_img, roi_bbox = detect_roi(img)
253
  if roi_bbox:
254
- conf_threshold *= 1.15 if (roi_bbox[2] * roi_bbox[3]) > (img.shape[0] * img.shape[1] * 0.3) else 1.0
255
 
256
  result, confidence = perform_ocr(roi_img, roi_bbox)
257
  if result and confidence >= conf_threshold * 100:
@@ -266,7 +298,7 @@ def extract_weight_from_image(pil_img):
266
 
267
  logging.info("Primary OCR failed, using full image fallback.")
268
  result, confidence = perform_ocr(img, None)
269
- if result and confidence >= conf_threshold * 0.95 * 100:
270
  try:
271
  weight = float(result)
272
  if 0.01 <= weight <= 1000:
 
32
  return np.mean(gray)
33
 
34
  def preprocess_image(img):
35
+ """Preprocess image with aggressive contrast and noise handling."""
36
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
37
  brightness = estimate_brightness(img)
38
+ # Maximum CLAHE for extreme contrast
39
+ clahe_clip = 10.0 if brightness < 80 else 6.0
40
+ clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(6, 6))
41
  enhanced = clahe.apply(gray)
42
  save_debug_image(enhanced, "01_preprocess_clahe")
43
+ # Edge-preserving blur
44
+ blurred = cv2.bilateralFilter(enhanced, 5, 75, 75)
45
  save_debug_image(blurred, "02_preprocess_blur")
46
+ # Adaptive thresholding with small blocks
47
+ block_size = max(5, min(15, int(img.shape[0] / 30) * 2 + 1))
48
  thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
49
+ cv2.THRESH_BINARY_INV, block_size, 3)
50
+ # Morphological operations for digit segmentation
51
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
52
+ thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
53
+ thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=5)
54
  save_debug_image(thresh, "03_preprocess_morph")
55
  return thresh, enhanced
56
 
 
58
  """Correct image rotation using edge detection."""
59
  try:
60
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
61
+ edges = cv2.Canny(gray, 20, 80, apertureSize=3)
62
+ lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=30, minLineLength=15, maxLineGap=5)
63
  if lines is not None:
64
  angles = [np.arctan2(line[0][3] - line[0][1], line[0][2] - line[0][0]) * 180 / np.pi for line in lines]
65
  angle = np.median(angles)
66
+ if abs(angle) > 0.3:
67
  h, w = img.shape[:2]
68
  center = (w // 2, h // 2)
69
  M = cv2.getRotationMatrix2D(center, angle, 1.0)
 
76
  return img
77
 
78
  def detect_roi(img):
79
+ """Detect region of interest with flexible contour filtering."""
80
  try:
81
  save_debug_image(img, "04_original")
82
  thresh, enhanced = preprocess_image(img)
83
  brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
84
+ block_sizes = [max(5, min(15, int(img.shape[0] / s) * 2 + 1)) for s in [6, 10, 15]]
85
  valid_contours = []
86
  img_area = img.shape[0] * img.shape[1]
87
 
88
  for block_size in block_sizes:
89
  temp_thresh = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
90
+ cv2.THRESH_BINARY_INV, block_size, 3)
91
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
92
+ temp_thresh = cv2.morphologyEx(temp_thresh, cv2.MORPH_CLOSE, kernel, iterations=5)
93
  save_debug_image(temp_thresh, f"05_roi_threshold_block{block_size}")
94
  contours, _ = cv2.findContours(temp_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
95
 
 
98
  x, y, w, h = cv2.boundingRect(c)
99
  roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
100
  aspect_ratio = w / h
101
+ if (200 < area < (img_area * 0.7) and
102
+ 0.2 <= aspect_ratio <= 10.0 and w > 50 and h > 20 and roi_brightness > 40):
103
  valid_contours.append((c, area * roi_brightness))
104
  logging.debug(f"Contour (block {block_size}): Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
105
 
106
  if valid_contours:
107
  contour, _ = max(valid_contours, key=lambda x: x[1])
108
  x, y, w, h = cv2.boundingRect(contour)
109
+ padding = max(15, min(40, int(min(w, h) * 0.3)))
110
  x, y = max(0, x - padding), max(0, y - padding)
111
  w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
112
  roi_img = img[y:y+h, x:x+w]
 
122
  save_debug_image(img, "06_roi_error_fallback")
123
  return img, None
124
 
125
+ def detect_digit_template(digit_img, brightness):
126
+ """Digit recognition using template matching with predefined patterns."""
127
  try:
128
  h, w = digit_img.shape
129
+ if h < 10 or w < 5:
130
+ logging.debug("Digit image too small for template matching.")
131
  return None
132
 
133
+ # Predefined digit templates (simplified binary patterns)
134
+ digit_templates = {
135
+ '0': np.array([[1, 1, 1, 1, 1],
136
+ [1, 0, 0, 0, 1],
137
+ [1, 0, 0, 0, 1],
138
+ [1, 0, 0, 0, 1],
139
+ [1, 1, 1, 1, 1]]),
140
+ '1': np.array([[0, 0, 1, 0, 0],
141
+ [0, 0, 1, 0, 0],
142
+ [0, 0, 1, 0, 0],
143
+ [0, 0, 1, 0, 0],
144
+ [0, 0, 1, 0, 0]]),
145
+ '2': np.array([[1, 1, 1, 1, 1],
146
+ [0, 0, 0, 0, 1],
147
+ [1, 1, 1, 1, 1],
148
+ [1, 0, 0, 0, 0],
149
+ [1, 1, 1, 1, 1]]),
150
+ '3': np.array([[1, 1, 1, 1, 1],
151
+ [0, 0, 0, 0, 1],
152
+ [1, 1, 1, 1, 1],
153
+ [0, 0, 0, 0, 1],
154
+ [1, 1, 1, 1, 1]]),
155
+ '4': np.array([[1, 0, 0, 0, 1],
156
+ [1, 0, 0, 0, 1],
157
+ [1, 1, 1, 1, 1],
158
+ [0, 0, 0, 0, 1],
159
+ [0, 0, 0, 0, 1]]),
160
+ '5': np.array([[1, 1, 1, 1, 1],
161
+ [1, 0, 0, 0, 0],
162
+ [1, 1, 1, 1, 1],
163
+ [0, 0, 0, 0, 1],
164
+ [1, 1, 1, 1, 1]]),
165
+ '6': np.array([[1, 1, 1, 1, 1],
166
+ [1, 0, 0, 0, 0],
167
+ [1, 1, 1, 1, 1],
168
+ [1, 0, 0, 0, 1],
169
+ [1, 1, 1, 1, 1]]),
170
+ '7': np.array([[1, 1, 1, 1, 1],
171
+ [0, 0, 0, 0, 1],
172
+ [0, 0, 0, 0, 1],
173
+ [0, 0, 0, 0, 1],
174
+ [0, 0, 0, 0, 1]]),
175
+ '8': np.array([[1, 1, 1, 1, 1],
176
+ [1, 0, 0, 0, 1],
177
+ [1, 1, 1, 1, 1],
178
+ [1, 0, 0, 0, 1],
179
+ [1, 1, 1, 1, 1]]),
180
+ '9': np.array([[1, 1, 1, 1, 1],
181
+ [1, 0, 0, 0, 1],
182
+ [1, 1, 1, 1, 1],
183
+ [0, 0, 0, 0, 1],
184
+ [1, 1, 1, 1, 1]]),
185
+ '.': np.array([[0, 0, 0],
186
+ [0, 1, 0],
187
+ [0, 0, 0]])
188
+ }
189
 
190
+ # Resize digit_img to match template size (5x5 for digits, 3x3 for decimal)
191
+ digit_img_resized = cv2.resize(digit_img, (5, 5), interpolation=cv2.INTER_NEAREST)
192
+ best_match, best_score = None, -1
193
+ for digit, template in digit_templates.items():
194
+ if digit == '.':
195
+ digit_img_resized = cv2.resize(digit_img, (3, 3), interpolation=cv2.INTER_NEAREST)
196
+ result = cv2.matchTemplate(digit_img_resized, template, cv2.TM_CCOEFF_NORMED)
197
+ _, max_val, _, _ = cv2.minMaxLoc(result)
198
+ if max_val > 0.7 and max_val > best_score:
199
+ best_score = max_val
200
+ best_match = digit
201
+ logging.debug(f"Template match: {best_match}, Score: {best_score:.2f}")
202
+ return best_match if best_score > 0.7 else None
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  except Exception as e:
204
+ logging.error(f"Template digit detection failed: {str(e)}")
205
  return None
206
 
207
  def perform_ocr(img, roi_bbox):
208
+ """Perform OCR with Tesseract and template-based fallback."""
209
  try:
210
  thresh, enhanced = preprocess_image(img)
211
  brightness = estimate_brightness(img)
212
  pil_img = Image.fromarray(enhanced)
213
  save_debug_image(pil_img, "07_ocr_input")
214
 
215
+ # Tesseract with flexible numeric config
216
+ custom_config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.'
217
  text = pytesseract.image_to_string(pil_img, config=custom_config)
218
  logging.info(f"Tesseract raw output: {text}")
219
 
 
224
  text = text.strip('.')
225
  if text and re.fullmatch(r"^\d*\.?\d*$", text):
226
  text = text.lstrip('0') or '0'
227
+ confidence = 97.0 if len(text.replace('.', '')) >= 3 else 94.0
228
  logging.info(f"Validated Tesseract text: {text}, Confidence: {confidence:.2f}%")
229
  return text, confidence
230
 
231
+ # Fallback to template-based detection
232
+ logging.info("Tesseract failed, using template-based detection.")
233
  contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
234
  digits_info = []
235
  for c in contours:
236
  x, y, w, h = cv2.boundingRect(c)
237
+ if w > 8 and h > 10 and 0.1 <= w/h <= 2.0:
238
  digits_info.append((x, x+w, y, y+h))
239
 
240
  if digits_info:
 
248
  continue
249
  digit_crop = thresh[y_min:y_max, x_min:x_max]
250
  save_debug_image(digit_crop, f"08_digit_crop_{idx}")
251
+ digit = detect_digit_template(digit_crop, brightness)
252
  if digit:
253
  recognized_text += digit
254
+ elif x_min - prev_x_max < 8 and prev_x_max != -float('inf'):
255
  recognized_text += '.'
256
  prev_x_max = x_max
257
 
 
261
  text = text.strip('.')
262
  if text and re.fullmatch(r"^\d*\.?\d*$", text):
263
  text = text.lstrip('0') or '0'
264
+ confidence = 92.0 if len(text.replace('.', '')) >= 3 else 89.0
265
+ logging.info(f"Validated template text: {text}, Confidence: {confidence:.2f}%")
266
  return text, confidence
267
 
268
  logging.info("No valid digits detected.")
 
272
  return None, 0.0
273
 
274
  def extract_weight_from_image(pil_img):
275
+ """Extract weight from any digital scale image."""
276
  try:
277
  img = np.array(pil_img)
278
  img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
279
  save_debug_image(img, "00_input_image")
280
  img = correct_rotation(img)
281
  brightness = estimate_brightness(img)
282
+ conf_threshold = 0.8 if brightness > 100 else 0.6
283
 
284
  roi_img, roi_bbox = detect_roi(img)
285
  if roi_bbox:
286
+ conf_threshold *= 1.05 if (roi_bbox[2] * roi_bbox[3]) > (img.shape[0] * img.shape[1] * 0.2) else 1.0
287
 
288
  result, confidence = perform_ocr(roi_img, roi_bbox)
289
  if result and confidence >= conf_threshold * 100:
 
298
 
299
  logging.info("Primary OCR failed, using full image fallback.")
300
  result, confidence = perform_ocr(img, None)
301
+ if result and confidence >= conf_threshold * 0.85 * 100:
302
  try:
303
  weight = float(result)
304
  if 0.01 <= weight <= 1000: