Sanjayraju30 commited on
Commit
6d57019
·
verified ·
1 Parent(s): 0c65757

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +79 -87
ocr_engine.py CHANGED
@@ -32,25 +32,25 @@ def estimate_brightness(img):
32
  return np.mean(gray)
33
 
34
  def preprocess_image(img):
35
- """Preprocess image for OCR with enhanced contrast and noise reduction."""
36
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
37
  brightness = estimate_brightness(img)
38
- # Dynamic CLAHE
39
- clahe_clip = 5.0 if brightness < 80 else 3.0
40
  clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8))
41
  enhanced = clahe.apply(gray)
42
  save_debug_image(enhanced, "01_preprocess_clahe")
43
- # Gaussian blur
44
  blurred = cv2.GaussianBlur(enhanced, (3, 3), 0)
45
  save_debug_image(blurred, "02_preprocess_blur")
46
- # Dynamic thresholding
47
- block_size = max(11, min(31, int(img.shape[0] / 15) * 2 + 1))
48
  thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
49
- cv2.THRESH_BINARY_INV, block_size, 5)
50
  # Morphological operations
51
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
52
- thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
53
- thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
54
  save_debug_image(thresh, "03_preprocess_morph")
55
  return thresh, enhanced
56
 
@@ -58,12 +58,12 @@ def correct_rotation(img):
58
  """Correct image rotation using edge detection."""
59
  try:
60
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
61
- edges = cv2.Canny(gray, 50, 150, apertureSize=3)
62
- lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=50, minLineLength=30, maxLineGap=10)
63
  if lines is not None:
64
  angles = [np.arctan2(line[0][3] - line[0][1], line[0][2] - line[0][0]) * 180 / np.pi for line in lines]
65
  angle = np.median(angles)
66
- if abs(angle) > 1.0:
67
  h, w = img.shape[:2]
68
  center = (w // 2, h // 2)
69
  M = cv2.getRotationMatrix2D(center, angle, 1.0)
@@ -76,20 +76,20 @@ def correct_rotation(img):
76
  return img
77
 
78
  def detect_roi(img):
79
- """Detect region of interest (display) with multi-scale contour filtering."""
80
  try:
81
  save_debug_image(img, "04_original")
82
  thresh, enhanced = preprocess_image(img)
83
  brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
84
- block_sizes = [max(11, min(31, int(img.shape[0] / s) * 2 + 1)) for s in [12, 15, 18]]
85
  valid_contours = []
86
  img_area = img.shape[0] * img.shape[1]
87
 
88
  for block_size in block_sizes:
89
  temp_thresh = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
90
- cv2.THRESH_BINARY_INV, block_size, 5)
91
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
92
- temp_thresh = cv2.morphologyEx(temp_thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
93
  save_debug_image(temp_thresh, f"05_roi_threshold_block{block_size}")
94
  contours, _ = cv2.findContours(temp_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
95
 
@@ -98,15 +98,15 @@ def detect_roi(img):
98
  x, y, w, h = cv2.boundingRect(c)
99
  roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
100
  aspect_ratio = w / h
101
- if (400 < area < (img_area * 0.6) and
102
- 0.5 <= aspect_ratio <= 8.0 and w > 70 and h > 30 and roi_brightness > 50):
103
  valid_contours.append((c, area * roi_brightness))
104
  logging.debug(f"Contour (block {block_size}): Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
105
 
106
  if valid_contours:
107
  contour, _ = max(valid_contours, key=lambda x: x[1])
108
  x, y, w, h = cv2.boundingRect(contour)
109
- padding = max(20, min(60, int(min(w, h) * 0.4)))
110
  x, y = max(0, x - padding), max(0, y - padding)
111
  w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
112
  roi_img = img[y:y+h, x:x+w]
@@ -122,103 +122,93 @@ def detect_roi(img):
122
  save_debug_image(img, "06_roi_error_fallback")
123
  return img, None
124
 
125
- def detect_segments(digit_img, brightness):
126
- """Detect seven-segment digits with adaptive thresholds."""
127
  try:
128
  h, w = digit_img.shape
129
- if h < 15 or w < 8:
130
- logging.debug("Digit image too small for segment detection.")
131
  return None
132
 
133
- segment_threshold = 0.25 if brightness < 80 else 0.35
134
- segments = {
135
- 'top': (int(w*0.1), int(w*0.9), 0, int(h*0.25)),
136
- 'middle': (int(w*0.1), int(w*0.9), int(h*0.45), int(h*0.55)),
137
- 'bottom': (int(w*0.1), int(w*0.9), int(h*0.75), h),
138
- 'left_top': (0, int(w*0.3), int(h*0.1), int(h*0.5)),
139
- 'left_bottom': (0, int(w*0.3), int(h*0.5), int(h*0.9)),
140
- 'right_top': (int(w*0.7), w, int(h*0.1), int(h*0.5)),
141
- 'right_bottom': (int(w*0.7), w, int(h*0.5), int(h*0.9))
142
- }
 
143
 
144
- segment_presence = {}
145
- for name, (x1, x2, y1, y2) in segments.items():
146
- x1, y1 = max(0, x1), max(0, y1)
147
- x2, y2 = min(w, x2), min(h, y2)
148
- region = digit_img[y1:y2, x1:x2]
149
- if region.size == 0:
150
- segment_presence[name] = False
151
- continue
152
- pixel_count = np.sum(region == 255)
153
- total_pixels = region.size
154
- segment_presence[name] = pixel_count / total_pixels > segment_threshold
155
- logging.debug(f"Segment {name}: {pixel_count}/{total_pixels} = {pixel_count/total_pixels:.2f}")
156
 
157
- digit_patterns = {
158
- '0': ('top', 'bottom', 'left_top', 'left_bottom', 'right_top', 'right_bottom'),
159
- '1': ('right_top', 'right_bottom'),
160
- '2': ('top', 'middle', 'bottom', 'left_bottom', 'right_top'),
161
- '3': ('top', 'middle', 'bottom', 'right_top', 'right_bottom'),
162
- '4': ('middle', 'left_top', 'right_top', 'right_bottom'),
163
- '5': ('top', 'middle', 'bottom', 'left_top', 'right_bottom'),
164
- '6': ('top', 'middle', 'bottom', 'left_top', 'left_bottom', 'right_bottom'),
165
- '7': ('top', 'right_top', 'right_bottom'),
166
- '8': ('top', 'middle', 'bottom', 'left_top', 'left_bottom', 'right_top', 'right_bottom'),
167
- '9': ('top', 'middle', 'bottom', 'left_top', 'right_top', 'right_bottom')
168
- }
169
 
170
- best_match, best_score = None, -1
171
- for digit, pattern in digit_patterns.items():
172
- matches = sum(1 for segment in pattern if segment_presence.get(segment, False))
173
- non_matches = sum(1 for segment in segment_presence if segment not in pattern and segment_presence[segment])
174
- score = matches - 0.15 * non_matches
175
- if matches >= len(pattern) * 0.65:
176
- score += 1.2
177
- if score > best_score:
178
- best_score = score
179
- best_match = digit
180
- logging.debug(f"Segment detection: {segment_presence}, Digit: {best_match}, Score: {best_score:.2f}")
181
- return best_match
 
 
 
 
 
 
182
  except Exception as e:
183
- logging.error(f"Segment detection failed: {str(e)}")
184
  return None
185
 
186
  def perform_ocr(img, roi_bbox):
187
- """Perform OCR with Tesseract and seven-segment fallback."""
188
  try:
189
  thresh, enhanced = preprocess_image(img)
190
  brightness = estimate_brightness(img)
191
  pil_img = Image.fromarray(enhanced)
192
  save_debug_image(pil_img, "07_ocr_input")
193
 
194
- # Tesseract OCR with numeric config
195
  custom_config = r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.'
196
  text = pytesseract.image_to_string(pil_img, config=custom_config)
197
  logging.info(f"Tesseract raw output: {text}")
198
 
199
- # Clean and validate text
200
  text = re.sub(r"[^\d\.]", "", text)
201
  if text.count('.') > 1:
202
  text = text.replace('.', '', text.count('.') - 1)
203
  text = text.strip('.')
204
  if text and re.fullmatch(r"^\d*\.?\d*$", text):
205
  text = text.lstrip('0') or '0'
206
- confidence = 95.0 if len(text.replace('.', '')) >= 2 else 90.0
207
  logging.info(f"Validated Tesseract text: {text}, Confidence: {confidence:.2f}%")
208
  return text, confidence
209
 
210
- # Fallback to seven-segment detection
211
- logging.info("Tesseract failed, using seven-segment detection.")
212
  contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
213
  digits_info = []
214
  for c in contours:
215
  x, y, w, h = cv2.boundingRect(c)
216
- if w > 10 and h > 15 and 0.2 <= w/h <= 1.5:
217
  digits_info.append((x, x+w, y, y+h))
218
 
219
  if digits_info:
220
  digits_info.sort(key=lambda x: x[0])
221
  recognized_text = ""
 
222
  for idx, (x_min, x_max, y_min, y_max) in enumerate(digits_info):
223
  x_min, y_min = max(0, x_min), max(0, y_min)
224
  x_max, y_max = min(thresh.shape[1], x_max), min(thresh.shape[0], y_max)
@@ -226,19 +216,21 @@ def perform_ocr(img, roi_bbox):
226
  continue
227
  digit_crop = thresh[y_min:y_max, x_min:x_max]
228
  save_debug_image(digit_crop, f"08_digit_crop_{idx}")
229
- segment_digit = detect_segments(digit_crop, brightness)
230
- if segment_digit:
231
- recognized_text += segment_digit
232
- elif idx < len(digits_info) - 1 and (digits_info[idx+1][0] - x_max) < 10:
233
- recognized_text += '.' # Assume decimal point for close digits
 
 
234
  text = re.sub(r"[^\d\.]", "", recognized_text)
235
  if text.count('.') > 1:
236
  text = text.replace('.', '', text.count('.') - 1)
237
  text = text.strip('.')
238
  if text and re.fullmatch(r"^\d*\.?\d*$", text):
239
  text = text.lstrip('0') or '0'
240
- confidence = 90.0
241
- logging.info(f"Validated segment text: {text}, Confidence: {confidence:.2f}%")
242
  return text, confidence
243
 
244
  logging.info("No valid digits detected.")
@@ -255,11 +247,11 @@ def extract_weight_from_image(pil_img):
255
  save_debug_image(img, "00_input_image")
256
  img = correct_rotation(img)
257
  brightness = estimate_brightness(img)
258
- conf_threshold = 0.8 if brightness > 100 else 0.6
259
 
260
  roi_img, roi_bbox = detect_roi(img)
261
  if roi_bbox:
262
- conf_threshold *= 1.1 if (roi_bbox[2] * roi_bbox[3]) > (img.shape[0] * img.shape[1] * 0.3) else 1.0
263
 
264
  result, confidence = perform_ocr(roi_img, roi_bbox)
265
  if result and confidence >= conf_threshold * 100:
@@ -274,7 +266,7 @@ def extract_weight_from_image(pil_img):
274
 
275
  logging.info("Primary OCR failed, using full image fallback.")
276
  result, confidence = perform_ocr(img, None)
277
- if result and confidence >= conf_threshold * 0.9 * 100:
278
  try:
279
  weight = float(result)
280
  if 0.01 <= weight <= 1000:
 
32
  return np.mean(gray)
33
 
34
  def preprocess_image(img):
35
+ """Preprocess image for OCR with aggressive contrast and noise reduction."""
36
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
37
  brightness = estimate_brightness(img)
38
+ # Aggressive CLAHE
39
+ clahe_clip = 6.0 if brightness < 80 else 4.0
40
  clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8))
41
  enhanced = clahe.apply(gray)
42
  save_debug_image(enhanced, "01_preprocess_clahe")
43
+ # Minimal blur to preserve edges
44
  blurred = cv2.GaussianBlur(enhanced, (3, 3), 0)
45
  save_debug_image(blurred, "02_preprocess_blur")
46
+ # Multi-scale thresholding
47
+ block_size = max(9, min(25, int(img.shape[0] / 20) * 2 + 1))
48
  thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
49
+ cv2.THRESH_BINARY_INV, block_size, 7)
50
  # Morphological operations
51
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
52
+ thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=2)
53
+ thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=3)
54
  save_debug_image(thresh, "03_preprocess_morph")
55
  return thresh, enhanced
56
 
 
58
  """Correct image rotation using edge detection."""
59
  try:
60
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
61
+ edges = cv2.Canny(gray, 30, 100, apertureSize=3)
62
+ lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=40, minLineLength=20, maxLineGap=10)
63
  if lines is not None:
64
  angles = [np.arctan2(line[0][3] - line[0][1], line[0][2] - line[0][0]) * 180 / np.pi for line in lines]
65
  angle = np.median(angles)
66
+ if abs(angle) > 0.5:
67
  h, w = img.shape[:2]
68
  center = (w // 2, h // 2)
69
  M = cv2.getRotationMatrix2D(center, angle, 1.0)
 
76
  return img
77
 
78
  def detect_roi(img):
79
+ """Detect region of interest with aggressive contour filtering."""
80
  try:
81
  save_debug_image(img, "04_original")
82
  thresh, enhanced = preprocess_image(img)
83
  brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
84
+ block_sizes = [max(9, min(25, int(img.shape[0] / s) * 2 + 1)) for s in [10, 15, 20]]
85
  valid_contours = []
86
  img_area = img.shape[0] * img.shape[1]
87
 
88
  for block_size in block_sizes:
89
  temp_thresh = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
90
+ cv2.THRESH_BINARY_INV, block_size, 7)
91
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
92
+ temp_thresh = cv2.morphologyEx(temp_thresh, cv2.MORPH_CLOSE, kernel, iterations=3)
93
  save_debug_image(temp_thresh, f"05_roi_threshold_block{block_size}")
94
  contours, _ = cv2.findContours(temp_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
95
 
 
98
  x, y, w, h = cv2.boundingRect(c)
99
  roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
100
  aspect_ratio = w / h
101
+ if (500 < area < (img_area * 0.5) and
102
+ 0.5 <= aspect_ratio <= 6.0 and w > 80 and h > 40 and roi_brightness > 60):
103
  valid_contours.append((c, area * roi_brightness))
104
  logging.debug(f"Contour (block {block_size}): Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
105
 
106
  if valid_contours:
107
  contour, _ = max(valid_contours, key=lambda x: x[1])
108
  x, y, w, h = cv2.boundingRect(contour)
109
+ padding = max(25, min(70, int(min(w, h) * 0.5)))
110
  x, y = max(0, x - padding), max(0, y - padding)
111
  w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
112
  roi_img = img[y:y+h, x:x+w]
 
122
  save_debug_image(img, "06_roi_error_fallback")
123
  return img, None
124
 
125
+ def detect_digit_contour(digit_img, brightness):
126
+ """Simplified contour-based digit recognition."""
127
  try:
128
  h, w = digit_img.shape
129
+ if h < 20 or w < 10:
130
+ logging.debug("Digit image too small for contour detection.")
131
  return None
132
 
133
+ # Normalize image
134
+ pixel_count = np.sum(digit_img == 255)
135
+ total_pixels = digit_img.size
136
+ density = pixel_count / total_pixels
137
+ if density < 0.1 or density > 0.8:
138
+ return None
139
+
140
+ # Contour analysis
141
+ contours, _ = cv2.findContours(digit_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
142
+ if not contours:
143
+ return None
144
 
145
+ contour = max(contours, key=cv2.contourArea)
146
+ x, y, cw, ch = cv2.boundingRect(contour)
147
+ if cw < 5 or ch < 10:
148
+ return None
 
 
 
 
 
 
 
 
149
 
150
+ aspect = cw / ch
151
+ area_ratio = cv2.contourArea(contour) / (cw * ch)
 
 
 
 
 
 
 
 
 
 
152
 
153
+ # Simplified digit patterns
154
+ if aspect > 0.2 and aspect < 0.4 and area_ratio > 0.5:
155
+ return '1'
156
+ elif aspect > 0.5 and area_ratio > 0.6:
157
+ if density > 0.5:
158
+ return '8'
159
+ elif density > 0.3:
160
+ return '0'
161
+ elif aspect > 0.4 and area_ratio > 0.5:
162
+ if density > 0.4:
163
+ return '3'
164
+ elif density > 0.3:
165
+ return '2'
166
+ elif aspect > 0.3 and area_ratio > 0.4:
167
+ return '5' if density > 0.3 else '7'
168
+ elif aspect > 0.2 and area_ratio > 0.3:
169
+ return '4' if density > 0.2 else '9'
170
+ return None
171
  except Exception as e:
172
+ logging.error(f"Contour digit detection failed: {str(e)}")
173
  return None
174
 
175
  def perform_ocr(img, roi_bbox):
176
+ """Perform OCR with Tesseract and contour-based fallback."""
177
  try:
178
  thresh, enhanced = preprocess_image(img)
179
  brightness = estimate_brightness(img)
180
  pil_img = Image.fromarray(enhanced)
181
  save_debug_image(pil_img, "07_ocr_input")
182
 
183
+ # Tesseract with aggressive numeric config
184
  custom_config = r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.'
185
  text = pytesseract.image_to_string(pil_img, config=custom_config)
186
  logging.info(f"Tesseract raw output: {text}")
187
 
188
+ # Clean and validate
189
  text = re.sub(r"[^\d\.]", "", text)
190
  if text.count('.') > 1:
191
  text = text.replace('.', '', text.count('.') - 1)
192
  text = text.strip('.')
193
  if text and re.fullmatch(r"^\d*\.?\d*$", text):
194
  text = text.lstrip('0') or '0'
195
+ confidence = 98.0 if len(text.replace('.', '')) >= 3 else 95.0
196
  logging.info(f"Validated Tesseract text: {text}, Confidence: {confidence:.2f}%")
197
  return text, confidence
198
 
199
+ # Fallback to contour-based detection
200
+ logging.info("Tesseract failed, using contour-based detection.")
201
  contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
202
  digits_info = []
203
  for c in contours:
204
  x, y, w, h = cv2.boundingRect(c)
205
+ if w > 15 and h > 20 and 0.2 <= w/h <= 1.2:
206
  digits_info.append((x, x+w, y, y+h))
207
 
208
  if digits_info:
209
  digits_info.sort(key=lambda x: x[0])
210
  recognized_text = ""
211
+ prev_x_max = -float('inf')
212
  for idx, (x_min, x_max, y_min, y_max) in enumerate(digits_info):
213
  x_min, y_min = max(0, x_min), max(0, y_min)
214
  x_max, y_max = min(thresh.shape[1], x_max), min(thresh.shape[0], y_max)
 
216
  continue
217
  digit_crop = thresh[y_min:y_max, x_min:x_max]
218
  save_debug_image(digit_crop, f"08_digit_crop_{idx}")
219
+ digit = detect_digit_contour(digit_crop, brightness)
220
+ if digit:
221
+ recognized_text += digit
222
+ elif x_min - prev_x_max < 15 and prev_x_max != -float('inf'):
223
+ recognized_text += '.'
224
+ prev_x_max = x_max
225
+
226
  text = re.sub(r"[^\d\.]", "", recognized_text)
227
  if text.count('.') > 1:
228
  text = text.replace('.', '', text.count('.') - 1)
229
  text = text.strip('.')
230
  if text and re.fullmatch(r"^\d*\.?\d*$", text):
231
  text = text.lstrip('0') or '0'
232
+ confidence = 92.0 if len(text.replace('.', '')) >= 3 else 90.0
233
+ logging.info(f"Validated contour text: {text}, Confidence: {confidence:.2f}%")
234
  return text, confidence
235
 
236
  logging.info("No valid digits detected.")
 
247
  save_debug_image(img, "00_input_image")
248
  img = correct_rotation(img)
249
  brightness = estimate_brightness(img)
250
+ conf_threshold = 0.9 if brightness > 100 else 0.7
251
 
252
  roi_img, roi_bbox = detect_roi(img)
253
  if roi_bbox:
254
+ conf_threshold *= 1.15 if (roi_bbox[2] * roi_bbox[3]) > (img.shape[0] * img.shape[1] * 0.3) else 1.0
255
 
256
  result, confidence = perform_ocr(roi_img, roi_bbox)
257
  if result and confidence >= conf_threshold * 100:
 
266
 
267
  logging.info("Primary OCR failed, using full image fallback.")
268
  result, confidence = perform_ocr(img, None)
269
+ if result and confidence >= conf_threshold * 0.95 * 100:
270
  try:
271
  weight = float(result)
272
  if 0.01 <= weight <= 1000: