Sanjayraju30 commited on
Commit
0e2ed11
·
verified ·
1 Parent(s): 908043f

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +41 -41
ocr_engine.py CHANGED
@@ -35,22 +35,22 @@ def preprocess_image(img):
35
  """Preprocess image with aggressive contrast and noise handling."""
36
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
37
  brightness = estimate_brightness(img)
38
- # Maximum CLAHE for extreme contrast
39
- clahe_clip = 10.0 if brightness < 80 else 6.0
40
- clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(6, 6))
41
  enhanced = clahe.apply(gray)
42
  save_debug_image(enhanced, "01_preprocess_clahe")
43
- # Edge-preserving blur
44
- blurred = cv2.bilateralFilter(enhanced, 5, 75, 75)
45
  save_debug_image(blurred, "02_preprocess_blur")
46
- # Adaptive thresholding with small blocks
47
- block_size = max(5, min(15, int(img.shape[0] / 30) * 2 + 1))
48
  thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
49
- cv2.THRESH_BINARY_INV, block_size, 3)
50
- # Morphological operations for digit segmentation
51
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
52
  thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
53
- thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=5)
54
  save_debug_image(thresh, "03_preprocess_morph")
55
  return thresh, enhanced
56
 
@@ -58,12 +58,12 @@ def correct_rotation(img):
58
  """Correct image rotation using edge detection."""
59
  try:
60
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
61
- edges = cv2.Canny(gray, 20, 80, apertureSize=3)
62
- lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=30, minLineLength=15, maxLineGap=5)
63
  if lines is not None:
64
  angles = [np.arctan2(line[0][3] - line[0][1], line[0][2] - line[0][0]) * 180 / np.pi for line in lines]
65
  angle = np.median(angles)
66
- if abs(angle) > 0.3:
67
  h, w = img.shape[:2]
68
  center = (w // 2, h // 2)
69
  M = cv2.getRotationMatrix2D(center, angle, 1.0)
@@ -81,15 +81,15 @@ def detect_roi(img):
81
  save_debug_image(img, "04_original")
82
  thresh, enhanced = preprocess_image(img)
83
  brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
84
- block_sizes = [max(5, min(15, int(img.shape[0] / s) * 2 + 1)) for s in [6, 10, 15]]
85
  valid_contours = []
86
  img_area = img.shape[0] * img.shape[1]
87
 
88
  for block_size in block_sizes:
89
  temp_thresh = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
90
- cv2.THRESH_BINARY_INV, block_size, 3)
91
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
92
- temp_thresh = cv2.morphologyEx(temp_thresh, cv2.MORPH_CLOSE, kernel, iterations=5)
93
  save_debug_image(temp_thresh, f"05_roi_threshold_block{block_size}")
94
  contours, _ = cv2.findContours(temp_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
95
 
@@ -98,15 +98,15 @@ def detect_roi(img):
98
  x, y, w, h = cv2.boundingRect(c)
99
  roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
100
  aspect_ratio = w / h
101
- if (200 < area < (img_area * 0.7) and
102
- 0.2 <= aspect_ratio <= 10.0 and w > 50 and h > 20 and roi_brightness > 40):
103
  valid_contours.append((c, area * roi_brightness))
104
  logging.debug(f"Contour (block {block_size}): Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
105
 
106
  if valid_contours:
107
  contour, _ = max(valid_contours, key=lambda x: x[1])
108
  x, y, w, h = cv2.boundingRect(contour)
109
- padding = max(15, min(40, int(min(w, h) * 0.3)))
110
  x, y = max(0, x - padding), max(0, y - padding)
111
  w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
112
  roi_img = img[y:y+h, x:x+w]
@@ -123,14 +123,14 @@ def detect_roi(img):
123
  return img, None
124
 
125
  def detect_digit_template(digit_img, brightness):
126
- """Digit recognition using template matching with predefined patterns."""
127
  try:
128
  h, w = digit_img.shape
129
- if h < 10 or w < 5:
130
  logging.debug("Digit image too small for template matching.")
131
  return None
132
 
133
- # Predefined digit templates (simplified binary patterns)
134
  digit_templates = {
135
  '0': np.array([[1, 1, 1, 1, 1],
136
  [1, 0, 0, 0, 1],
@@ -143,29 +143,29 @@ def detect_digit_template(digit_img, brightness):
143
  [0, 0, 1, 0, 0],
144
  [0, 0, 1, 0, 0]]),
145
  '2': np.array([[1, 1, 1, 1, 1],
146
- [0, 0, 0, 0, 1],
147
  [1, 1, 1, 1, 1],
148
- [1, 0, 0, 0, 0],
149
  [1, 1, 1, 1, 1]]),
150
  '3': np.array([[1, 1, 1, 1, 1],
151
- [0, 0, 0, 0, 1],
152
- [1, 1, 1, 1, 1],
153
- [0, 0, 0, 0, 1],
154
  [1, 1, 1, 1, 1]]),
155
- '4': np.array([[1, 0, 0, 0, 1],
156
- [1, 0, 0, 0, 1],
157
  [1, 1, 1, 1, 1],
158
  [0, 0, 0, 0, 1],
159
  [0, 0, 0, 0, 1]]),
160
  '5': np.array([[1, 1, 1, 1, 1],
161
- [1, 0, 0, 0, 0],
162
  [1, 1, 1, 1, 1],
163
- [0, 0, 0, 0, 1],
164
  [1, 1, 1, 1, 1]]),
165
  '6': np.array([[1, 1, 1, 1, 1],
166
- [1, 0, 0, 0, 0],
167
  [1, 1, 1, 1, 1],
168
- [1, 0, 0, 0, 1],
169
  [1, 1, 1, 1, 1]]),
170
  '7': np.array([[1, 1, 1, 1, 1],
171
  [0, 0, 0, 0, 1],
@@ -180,7 +180,7 @@ def detect_digit_template(digit_img, brightness):
180
  '9': np.array([[1, 1, 1, 1, 1],
181
  [1, 0, 0, 0, 1],
182
  [1, 1, 1, 1, 1],
183
- [0, 0, 0, 0, 1],
184
  [1, 1, 1, 1, 1]]),
185
  '.': np.array([[0, 0, 0],
186
  [0, 1, 0],
@@ -195,11 +195,11 @@ def detect_digit_template(digit_img, brightness):
195
  digit_img_resized = cv2.resize(digit_img, (3, 3), interpolation=cv2.INTER_NEAREST)
196
  result = cv2.matchTemplate(digit_img_resized, template, cv2.TM_CCOEFF_NORMED)
197
  _, max_val, _, _ = cv2.minMaxLoc(result)
198
- if max_val > 0.7 and max_val > best_score:
199
  best_score = max_val
200
  best_match = digit
201
  logging.debug(f"Template match: {best_match}, Score: {best_score:.2f}")
202
- return best_match if best_score > 0.7 else None
203
  except Exception as e:
204
  logging.error(f"Template digit detection failed: {str(e)}")
205
  return None
@@ -234,7 +234,7 @@ def perform_ocr(img, roi_bbox):
234
  digits_info = []
235
  for c in contours:
236
  x, y, w, h = cv2.boundingRect(c)
237
- if w > 8 and h > 10 and 0.1 <= w/h <= 2.0:
238
  digits_info.append((x, x+w, y, y+h))
239
 
240
  if digits_info:
@@ -251,7 +251,7 @@ def perform_ocr(img, roi_bbox):
251
  digit = detect_digit_template(digit_crop, brightness)
252
  if digit:
253
  recognized_text += digit
254
- elif x_min - prev_x_max < 8 and prev_x_max != -float('inf'):
255
  recognized_text += '.'
256
  prev_x_max = x_max
257
 
@@ -279,11 +279,11 @@ def extract_weight_from_image(pil_img):
279
  save_debug_image(img, "00_input_image")
280
  img = correct_rotation(img)
281
  brightness = estimate_brightness(img)
282
- conf_threshold = 0.8 if brightness > 100 else 0.6
283
 
284
  roi_img, roi_bbox = detect_roi(img)
285
  if roi_bbox:
286
- conf_threshold *= 1.05 if (roi_bbox[2] * roi_bbox[3]) > (img.shape[0] * img.shape[1] * 0.2) else 1.0
287
 
288
  result, confidence = perform_ocr(roi_img, roi_bbox)
289
  if result and confidence >= conf_threshold * 100:
@@ -298,7 +298,7 @@ def extract_weight_from_image(pil_img):
298
 
299
  logging.info("Primary OCR failed, using full image fallback.")
300
  result, confidence = perform_ocr(img, None)
301
- if result and confidence >= conf_threshold * 0.85 * 100:
302
  try:
303
  weight = float(result)
304
  if 0.01 <= weight <= 1000:
 
35
  """Preprocess image with aggressive contrast and noise handling."""
36
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
37
  brightness = estimate_brightness(img)
38
+ # Maximum CLAHE with adjusted clip for better digit enhancement
39
+ clahe_clip = 12.0 if brightness < 80 else 8.0
40
+ clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(4, 4))
41
  enhanced = clahe.apply(gray)
42
  save_debug_image(enhanced, "01_preprocess_clahe")
43
+ # Stronger edge-preserving blur
44
+ blurred = cv2.bilateralFilter(enhanced, 7, 100, 100)
45
  save_debug_image(blurred, "02_preprocess_blur")
46
+ # Adaptive thresholding with smaller blocks
47
+ block_size = max(3, min(11, int(img.shape[0] / 40) * 2 + 1))
48
  thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
49
+ cv2.THRESH_BINARY_INV, block_size, 2)
50
+ # Morphological operations for robust digit segmentation
51
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
52
  thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
53
+ thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=6)
54
  save_debug_image(thresh, "03_preprocess_morph")
55
  return thresh, enhanced
56
 
 
58
  """Correct image rotation using edge detection."""
59
  try:
60
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
61
+ edges = cv2.Canny(gray, 15, 60, apertureSize=3)
62
+ lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=20, minLineLength=10, maxLineGap=3)
63
  if lines is not None:
64
  angles = [np.arctan2(line[0][3] - line[0][1], line[0][2] - line[0][0]) * 180 / np.pi for line in lines]
65
  angle = np.median(angles)
66
+ if abs(angle) > 0.2:
67
  h, w = img.shape[:2]
68
  center = (w // 2, h // 2)
69
  M = cv2.getRotationMatrix2D(center, angle, 1.0)
 
81
  save_debug_image(img, "04_original")
82
  thresh, enhanced = preprocess_image(img)
83
  brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
84
+ block_sizes = [max(3, min(11, int(img.shape[0] / s) * 2 + 1)) for s in [4, 8, 12]]
85
  valid_contours = []
86
  img_area = img.shape[0] * img.shape[1]
87
 
88
  for block_size in block_sizes:
89
  temp_thresh = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
90
+ cv2.THRESH_BINARY_INV, block_size, 2)
91
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
92
+ temp_thresh = cv2.morphologyEx(temp_thresh, cv2.MORPH_CLOSE, kernel, iterations=6)
93
  save_debug_image(temp_thresh, f"05_roi_threshold_block{block_size}")
94
  contours, _ = cv2.findContours(temp_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
95
 
 
98
  x, y, w, h = cv2.boundingRect(c)
99
  roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
100
  aspect_ratio = w / h
101
+ if (150 < area < (img_area * 0.8) and
102
+ 0.15 <= aspect_ratio <= 12.0 and w > 40 and h > 15 and roi_brightness > 30):
103
  valid_contours.append((c, area * roi_brightness))
104
  logging.debug(f"Contour (block {block_size}): Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
105
 
106
  if valid_contours:
107
  contour, _ = max(valid_contours, key=lambda x: x[1])
108
  x, y, w, h = cv2.boundingRect(contour)
109
+ padding = max(10, min(30, int(min(w, h) * 0.25)))
110
  x, y = max(0, x - padding), max(0, y - padding)
111
  w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
112
  roi_img = img[y:y+h, x:x+w]
 
123
  return img, None
124
 
125
  def detect_digit_template(digit_img, brightness):
126
+ """Digit recognition using template matching with adjusted patterns."""
127
  try:
128
  h, w = digit_img.shape
129
+ if h < 8 or w < 4:
130
  logging.debug("Digit image too small for template matching.")
131
  return None
132
 
133
+ # Adjusted digit templates for seven-segment display
134
  digit_templates = {
135
  '0': np.array([[1, 1, 1, 1, 1],
136
  [1, 0, 0, 0, 1],
 
143
  [0, 0, 1, 0, 0],
144
  [0, 0, 1, 0, 0]]),
145
  '2': np.array([[1, 1, 1, 1, 1],
146
+ [0, 0, 0, 1, 1],
147
  [1, 1, 1, 1, 1],
148
+ [1, 1, 0, 0, 0],
149
  [1, 1, 1, 1, 1]]),
150
  '3': np.array([[1, 1, 1, 1, 1],
151
+ [0, 0, 0, 1, 1],
152
+ [0, 1, 1, 1, 1],
153
+ [0, 0, 0, 1, 1],
154
  [1, 1, 1, 1, 1]]),
155
+ '4': np.array([[1, 1, 0, 0, 1],
156
+ [1, 1, 0, 0, 1],
157
  [1, 1, 1, 1, 1],
158
  [0, 0, 0, 0, 1],
159
  [0, 0, 0, 0, 1]]),
160
  '5': np.array([[1, 1, 1, 1, 1],
161
+ [1, 1, 0, 0, 0],
162
  [1, 1, 1, 1, 1],
163
+ [0, 0, 0, 1, 1],
164
  [1, 1, 1, 1, 1]]),
165
  '6': np.array([[1, 1, 1, 1, 1],
166
+ [1, 1, 0, 0, 0],
167
  [1, 1, 1, 1, 1],
168
+ [1, 0, 0, 1, 1],
169
  [1, 1, 1, 1, 1]]),
170
  '7': np.array([[1, 1, 1, 1, 1],
171
  [0, 0, 0, 0, 1],
 
180
  '9': np.array([[1, 1, 1, 1, 1],
181
  [1, 0, 0, 0, 1],
182
  [1, 1, 1, 1, 1],
183
+ [0, 0, 0, 1, 1],
184
  [1, 1, 1, 1, 1]]),
185
  '.': np.array([[0, 0, 0],
186
  [0, 1, 0],
 
195
  digit_img_resized = cv2.resize(digit_img, (3, 3), interpolation=cv2.INTER_NEAREST)
196
  result = cv2.matchTemplate(digit_img_resized, template, cv2.TM_CCOEFF_NORMED)
197
  _, max_val, _, _ = cv2.minMaxLoc(result)
198
+ if max_val > 0.65 and max_val > best_score: # Lowered threshold for better match
199
  best_score = max_val
200
  best_match = digit
201
  logging.debug(f"Template match: {best_match}, Score: {best_score:.2f}")
202
+ return best_match if best_score > 0.65 else None
203
  except Exception as e:
204
  logging.error(f"Template digit detection failed: {str(e)}")
205
  return None
 
234
  digits_info = []
235
  for c in contours:
236
  x, y, w, h = cv2.boundingRect(c)
237
+ if w > 6 and h > 8 and 0.1 <= w/h <= 2.5: # Loosened size and aspect ratio
238
  digits_info.append((x, x+w, y, y+h))
239
 
240
  if digits_info:
 
251
  digit = detect_digit_template(digit_crop, brightness)
252
  if digit:
253
  recognized_text += digit
254
+ elif x_min - prev_x_max < 6 and prev_x_max != -float('inf'): # Adjusted decimal gap
255
  recognized_text += '.'
256
  prev_x_max = x_max
257
 
 
279
  save_debug_image(img, "00_input_image")
280
  img = correct_rotation(img)
281
  brightness = estimate_brightness(img)
282
+ conf_threshold = 0.75 if brightness > 100 else 0.55 # Lowered threshold
283
 
284
  roi_img, roi_bbox = detect_roi(img)
285
  if roi_bbox:
286
+ conf_threshold *= 1.05 if (roi_bbox[2] * roi_bbox[3]) > (img.shape[0] * img.shape[1] * 0.15) else 1.0
287
 
288
  result, confidence = perform_ocr(roi_img, roi_bbox)
289
  if result and confidence >= conf_threshold * 100:
 
298
 
299
  logging.info("Primary OCR failed, using full image fallback.")
300
  result, confidence = perform_ocr(img, None)
301
+ if result and confidence >= conf_threshold * 0.8 * 100: # Adjusted fallback threshold
302
  try:
303
  weight = float(result)
304
  if 0.01 <= weight <= 1000: