Sanjayraju30 commited on
Commit
ef265f2
·
verified ·
1 Parent(s): 58fea44

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +44 -371
ocr_engine.py CHANGED
@@ -2,306 +2,54 @@ import pytesseract
2
  import numpy as np
3
  import cv2
4
  import re
5
- import logging
6
- from datetime import datetime
7
- import os
8
  from PIL import Image
 
9
 
10
  # Set up logging
11
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
12
 
13
- # Directory for debug images
14
- DEBUG_DIR = "debug_images"
15
- os.makedirs(DEBUG_DIR, exist_ok=True)
16
-
17
- def save_debug_image(img, filename_suffix, prefix=""):
18
- """Save image to debug directory with timestamp."""
19
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
20
- filename = os.path.join(DEBUG_DIR, f"{prefix}{timestamp}_{filename_suffix}.png")
21
- if isinstance(img, Image.Image):
22
- img.save(filename)
23
- elif len(img.shape) == 3:
24
- cv2.imwrite(filename, cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
25
- else:
26
- cv2.imwrite(filename, img)
27
- logging.info(f"Saved debug image: {filename}")
28
-
29
- def estimate_brightness(img):
30
- """Estimate image brightness."""
31
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
32
- return np.mean(gray)
33
-
34
  def preprocess_image(img):
35
- """Preprocess image with simplified, robust contrast enhancement."""
36
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
37
- brightness = estimate_brightness(img)
38
-
39
- # Apply mild CLAHE for contrast
40
- clahe_clip = 8.0 if brightness < 90 else 4.0
41
- clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8))
42
- enhanced = clahe.apply(gray)
43
- save_debug_image(enhanced, "01_preprocess_clahe")
44
-
45
- # Light blur to reduce noise
46
- blurred = cv2.GaussianBlur(enhanced, (5, 5), 0)
47
- save_debug_image(blurred, "02_preprocess_blur")
48
-
49
- # Dynamic thresholding with larger block size for small displays
50
- block_size = max(7, min(31, int(img.shape[0] / 20) * 2 + 1))
51
- thresh = cv2.adaptiveThreshold(
52
- blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
53
- cv2.THRESH_BINARY_INV, block_size, 3
54
- )
55
-
56
- # Minimal morphological operations
57
- kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
58
- thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
59
- save_debug_image(thresh, "03_preprocess_morph")
60
- return thresh, enhanced
61
-
62
- def correct_rotation(img):
63
- """Correct image rotation using edge detection."""
64
  try:
 
 
 
 
 
65
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
66
- edges = cv2.Canny(gray, 30, 100, apertureSize=3)
67
- lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=25, minLineLength=15, maxLineGap=10)
68
- if lines is not None:
69
- angles = [np.arctan2(line[0][3] - line[0][1], line[0][2] - line[0][0]) * 180 / np.pi for line in lines]
70
- angle = np.median(angles)
71
- if abs(angle) > 0.3:
72
- h, w = img.shape[:2]
73
- center = (w // 2, h // 2)
74
- M = cv2.getRotationMatrix2D(center, angle, 1.0)
75
- img = cv2.warpAffine(img, M, (w, h))
76
- save_debug_image(img, "00_rotated_image")
77
- logging.info(f"Applied rotation: {angle:.2f} degrees")
78
- return img
79
- except Exception as e:
80
- logging.error(f"Rotation correction failed: {str(e)}")
81
- return img
82
-
83
- def detect_roi(img):
84
- """Detect region of interest with broader contour analysis."""
85
- try:
86
- save_debug_image(img, "04_original")
87
- thresh, enhanced = preprocess_image(img)
88
- brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
89
- block_sizes = [max(7, min(31, int(img.shape[0] / s) * 2 + 1)) for s in [5, 10, 20]]
90
- valid_contours = []
91
- img_area = img.shape[0] * img.shape[1]
92
 
93
- for block_size in block_sizes:
94
- temp_thresh = cv2.adaptiveThreshold(
95
- enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
96
- cv2.THRESH_BINARY_INV, block_size, 3
97
- )
98
- kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
99
- temp_thresh = cv2.morphologyEx(temp_thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
100
- save_debug_image(temp_thresh, f"05_roi_threshold_block{block_size}")
101
- contours, _ = cv2.findContours(temp_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
102
-
103
- for c in contours:
104
- area = cv2.contourArea(c)
105
- x, y, w, h = cv2.boundingRect(c)
106
- roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
107
- aspect_ratio = w / h
108
- if (50 < area < (img_area * 0.95) and
109
- 0.05 <= aspect_ratio <= 20.0 and w > 20 and h > 8 and roi_brightness > 15):
110
- valid_contours.append((c, area * roi_brightness))
111
- logging.debug(f"Contour (block {block_size}): Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
112
 
113
- if valid_contours:
114
- contour, _ = max(valid_contours, key=lambda x: x[1])
115
- x, y, w, h = cv2.boundingRect(contour)
116
- padding = max(5, min(20, int(min(w, h) * 0.4)))
117
- x, y = max(0, x - padding), max(0, y - padding)
118
- w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
119
- roi_img = img[y:y+h, x:x+w]
120
- save_debug_image(roi_img, "06_detected_roi")
121
- logging.info(f"Detected ROI: ({x}, {y}, {w}, {h})")
122
- return roi_img, (x, y, w, h)
123
 
124
- logging.info("No ROI found, using full image.")
125
- save_debug_image(img, "06_no_roi_fallback")
126
- return img, None
127
- except Exception as e:
128
- logging.error(f"ROI detection failed: {str(e)}")
129
- save_debug_image(img, "06_roi_error_fallback")
130
- return img, None
131
-
132
- def detect_digit_template(digit_img, brightness):
133
- """Digit recognition with expanded template matching."""
134
- try:
135
- h, w = digit_img.shape
136
- if h < 5 or w < 2:
137
- logging.debug("Digit image too small for template matching.")
138
- return None
139
-
140
- # Expanded digit templates for seven-segment display variations
141
- digit_templates = {
142
- '0': [
143
- np.array([[1, 1, 1, 1, 1],
144
- [1, 0, 0, 0, 1],
145
- [1, 0, 0, 0, 1],
146
- [1, 0, 0, 0, 1],
147
- [1, 1, 1, 1, 1]], dtype=np.float32),
148
- np.array([[1, 1, 1, 1],
149
- [1, 0, 0, 1],
150
- [1, 0, 0, 1],
151
- [1, 0, 0, 1],
152
- [1, 1, 1, 1]], dtype=np.float32)
153
- ],
154
- '1': [
155
- np.array([[0, 0, 1, 0, 0],
156
- [0, 0, 1, 0, 0],
157
- [0, 0, 1, 0, 0],
158
- [0, 0, 1, 0, 0],
159
- [0, 0, 1, 0, 0]], dtype=np.float32),
160
- np.array([[0, 1, 0],
161
- [0, 1, 0],
162
- [0, 1, 0],
163
- [0, 1, 0],
164
- [0, 1, 0]], dtype=np.float32)
165
- ],
166
- '2': [
167
- np.array([[1, 1, 1, 1, 1],
168
- [0, 0, 0, 1, 1],
169
- [1, 1, 1, 1, 1],
170
- [1, 1, 0, 0, 0],
171
- [1, 1, 1, 1, 1]], dtype=np.float32),
172
- np.array([[1, 1, 1, 1],
173
- [0, 0, 1, 1],
174
- [1, 1, 1, 1],
175
- [1, 1, 0, 0],
176
- [1, 1, 1, 1]], dtype=np.float32)
177
- ],
178
- '3': [
179
- np.array([[1, 1, 1, 1, 1],
180
- [0, 0, 0, 1, 1],
181
- [1, 1, 1, 1, 1],
182
- [0, 0, 0, 1, 1],
183
- [1, 1, 1, 1, 1]], dtype=np.float32),
184
- np.array([[1, 1, 1, 1],
185
- [0, 0, 1, 1],
186
- [1, 1, 1, 1],
187
- [0, 0, 1, 1],
188
- [1, 1, 1, 1]], dtype=np.float32)
189
- ],
190
- '4': [
191
- np.array([[1, 1, 0, 0, 1],
192
- [1, 1, 0, 0, 1],
193
- [1, 1, 1, 1, 1],
194
- [0, 0, 0, 0, 1],
195
- [0, 0, 0, 0, 1]], dtype=np.float32),
196
- np.array([[1, 0, 0, 1],
197
- [1, 0, 0, 1],
198
- [1, 1, 1, 1],
199
- [0, 0, 0, 1],
200
- [0, 0, 0, 1]], dtype=np.float32)
201
- ],
202
- '5': [
203
- np.array([[1, 1, 1, 1, 1],
204
- [1, 1, 0, 0, 0],
205
- [1, 1, 1, 1, 1],
206
- [0, 0, 0, 1, 1],
207
- [1, 1, 1, 1, 1]], dtype=np.float32),
208
- np.array([[1, 1, 1, 1],
209
- [1, 1, 0, 0],
210
- [1, 1, 1, 1],
211
- [0, 0, 1, 1],
212
- [1, 1, 1, 1]], dtype=np.float32)
213
- ],
214
- '6': [
215
- np.array([[1, 1, 1, 1, 1],
216
- [1, 1, 0, 0, 0],
217
- [1, 1, 1, 1, 1],
218
- [1, 0, 0, 1, 1],
219
- [1, 1, 1, 1, 1]], dtype=np.float32),
220
- np.array([[1, 1, 1, 1],
221
- [1, 1, 0, 0],
222
- [1, 1, 1, 1],
223
- [1, 0, 1, 1],
224
- [1, 1, 1, 1]], dtype=np.float32)
225
- ],
226
- '7': [
227
- np.array([[1, 1, 1, 1, 1],
228
- [0, 0, 0, 0, 1],
229
- [0, 0, 0, 0, 1],
230
- [0, 0, 0, 0, 1],
231
- [0, 0, 0, 0, 1]], dtype=np.float32),
232
- np.array([[1, 1, 1, 1],
233
- [0, 0, 0, 1],
234
- [0, 0, 0, 1],
235
- [0, 0, 0, 1],
236
- [0, 0, 0, 1]], dtype=np.float32)
237
- ],
238
- '8': [
239
- np.array([[1, 1, 1, 1, 1],
240
- [1, 0, 0, 0, 1],
241
- [1, 1, 1, 1, 1],
242
- [1, 0, 0, 0, 1],
243
- [1, 1, 1, 1, 1]], dtype=np.float32),
244
- np.array([[1, 1, 1, 1],
245
- [1, 0, 0, 1],
246
- [1, 1, 1, 1],
247
- [1, 0, 0, 1],
248
- [1, 1, 1, 1]], dtype=np.float32)
249
- ],
250
- '9': [
251
- np.array([[1, 1, 1, 1, 1],
252
- [1, 0, 0, 0, 1],
253
- [1, 1, 1, 1, 1],
254
- [0, 0, 0, 1, 1],
255
- [1, 1, 1, 1, 1]], dtype=np.float32),
256
- np.array([[1, 1, 1, 1],
257
- [1, 0, 0, 1],
258
- [1, 1, 1, 1],
259
- [0, 0, 1, 1],
260
- [1, 1, 1, 1]], dtype=np.float32)
261
- ],
262
- '.': [
263
- np.array([[0, 0, 0],
264
- [0, 1, 0],
265
- [0, 0, 0]], dtype=np.float32),
266
- np.array([[0, 0],
267
- [1, 0],
268
- [0, 0]], dtype=np.float32)
269
- ]
270
- }
271
-
272
- # Try multiple sizes for digit image
273
- sizes = [(5, 5), (4, 4), (3, 3)] if h > w else [(3, 3), (2, 2)]
274
- best_match, best_score = None, -1
275
- for size in sizes:
276
- digit_img_resized = cv2.resize(digit_img, size, interpolation=cv2.INTER_AREA)
277
- digit_img_resized = (digit_img_resized > 100).astype(np.float32) # Binarize
278
-
279
- for digit, templates in digit_templates.items():
280
- for template in templates:
281
- if digit == '.' and size[0] > 3:
282
- continue
283
- if digit != '.' and size[0] <= 3:
284
- continue
285
- if template.shape[0] != size[0] or template.shape[1] != size[1]:
286
- continue
287
- result = cv2.matchTemplate(digit_img_resized, template, cv2.TM_CCOEFF_NORMED)
288
- _, max_val, _, _ = cv2.minMaxLoc(result)
289
- if max_val > 0.55 and max_val > best_score: # Further lowered threshold
290
- best_score = max_val
291
- best_match = digit
292
- logging.debug(f"Template match: {best_match}, Score: {best_score:.2f}")
293
- return best_match if best_score > 0.55 else None
294
  except Exception as e:
295
- logging.error(f"Template digit detection failed: {str(e)}")
296
- return None
297
 
298
- def perform_ocr(img, roi_bbox):
299
- """Perform OCR with Tesseract and robust template fallback."""
300
  try:
301
- thresh, enhanced = preprocess_image(img)
302
- brightness = estimate_brightness(img)
303
- pil_img = Image.fromarray(enhanced)
304
- save_debug_image(pil_img, "07_ocr_input")
 
 
305
 
306
  # Try multiple Tesseract configurations
307
  configs = [
@@ -309,8 +57,10 @@ def perform_ocr(img, roi_bbox):
309
  r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.' # Block of text
310
  ]
311
  for config in configs:
312
- text = pytesseract.image_to_string(pil_img, config=config)
313
  logging.info(f"Tesseract raw output (config {config}): {text}")
 
 
314
  text = re.sub(r"[^\d\.]", "", text)
315
  if text.count('.') > 1:
316
  text = text.replace('.', '', text.count('.') - 1)
@@ -318,91 +68,14 @@ def perform_ocr(img, roi_bbox):
318
  if text and re.fullmatch(r"^\d*\.?\d*$", text):
319
  text = text.lstrip('0') or '0'
320
  confidence = 95.0 if len(text.replace('.', '')) >= 3 else 90.0
321
- logging.info(f"Validated Tesseract text: {text}, Confidence: {confidence:.2f}%")
322
- return text, confidence
323
-
324
- # Fallback to template-based detection
325
- logging.info("Tesseract failed, using template-based detection.")
326
- contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
327
- digits_info = []
328
- for c in contours:
329
- x, y, w, h = cv2.boundingRect(c)
330
- if w > 4 and h > 5 and 0.03 <= w/h <= 4.0:
331
- digits_info.append((x, x+w, y, y+h))
332
-
333
- if digits_info:
334
- digits_info.sort(key=lambda x: x[0])
335
- recognized_text = ""
336
- prev_x_max = -float('inf')
337
- for idx, (x_min, x_max, y_min, y_max) in enumerate(digits_info):
338
- x_min, y_min = max(0, x_min), max(0, y_min)
339
- x_max, y_max = min(thresh.shape[1], x_max), min(thresh.shape[0], y_max)
340
- if x_max <= x_min or y_max <= y_min:
341
- continue
342
- digit_crop = thresh[y_min:y_max, x_min:x_max]
343
- save_debug_image(digit_crop, f"08_digit_crop_{idx}")
344
- digit = detect_digit_template(digit_crop, brightness)
345
- if digit:
346
- recognized_text += digit
347
- elif x_min - prev_x_max < 10 and prev_x_max != -float('inf'):
348
- recognized_text += '.'
349
- prev_x_max = x_max
350
-
351
- text = re.sub(r"[^\d\.]", "", recognized_text)
352
- if text.count('.') > 1:
353
- text = text.replace('.', '', text.count('.') - 1)
354
- text = text.strip('.')
355
- if text and re.fullmatch(r"^\d*\.?\d*$", text):
356
- text = text.lstrip('0') or '0'
357
- confidence = 90.0 if len(text.replace('.', '')) >= 3 else 85.0
358
- logging.info(f"Validated template text: {text}, Confidence: {confidence:.2f}%")
359
- return text, confidence
360
 
361
- logging.info("No valid digits detected.")
362
- return None, 0.0
363
- except Exception as e:
364
- logging.error(f"OCR failed: {str(e)}")
365
- return None, 0.0
366
-
367
- def extract_weight_from_image(pil_img):
368
- """Extract weight from any digital scale image."""
369
- try:
370
- img = np.array(pil_img)
371
- img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
372
- save_debug_image(img, "00_input_image")
373
- img = correct_rotation(img)
374
- brightness = estimate_brightness(img)
375
- conf_threshold = 0.65 if brightness > 70 else 0.45
376
-
377
- # Try ROI-based detection
378
- roi_img, roi_bbox = detect_roi(img)
379
- if roi_bbox:
380
- conf_threshold *= 1.15 if (roi_bbox[2] * roi_bbox[3]) > (img.shape[0] * img.shape[1] * 0.05) else 1.0
381
-
382
- result, confidence = perform_ocr(roi_img, roi_bbox)
383
- if result and confidence >= conf_threshold * 100:
384
- try:
385
- weight = float(result)
386
- if 0.001 <= weight <= 5000:
387
- logging.info(f"Detected weight: {result} kg, Confidence: {confidence:.2f}%")
388
- return result, confidence
389
- logging.warning(f"Weight {result} out of range.")
390
- except ValueError:
391
- logging.warning(f"Invalid weight format: {result}")
392
-
393
- # Full image fallback
394
- logging.info("Primary OCR failed, using full image fallback.")
395
- result, confidence = perform_ocr(img, None)
396
- if result and confidence >= conf_threshold * 0.85 * 100:
397
- try:
398
- weight = float(result)
399
- if 0.001 <= weight <= 5000:
400
- logging.info(f"Full image weight: {result} kg, Confidence: {confidence:.2f}%")
401
- return result, confidence
402
- logging.warning(f"Full image weight {result} out of range.")
403
- except ValueError:
404
- logging.warning(f"Invalid full image weight format: {result}")
405
-
406
  logging.info("No valid weight detected.")
407
  return "Not detected", 0.0
408
  except Exception as e:
 
2
  import numpy as np
3
  import cv2
4
  import re
 
 
 
5
  from PIL import Image
6
+ import logging
7
 
8
  # Set up logging
9
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def preprocess_image(img):
12
+ """Preprocess image for robust OCR."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  try:
14
+ # Convert to OpenCV format
15
+ img = np.array(img)
16
+ img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
17
+
18
+ # Convert to grayscale
19
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ # Estimate brightness for adaptive processing
22
+ brightness = np.mean(gray)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+ # Apply CLAHE for contrast enhancement
25
+ clahe_clip = 4.0 if brightness < 100 else 2.0
26
+ clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8))
27
+ enhanced = clahe.apply(gray)
 
 
 
 
 
 
28
 
29
+ # Apply adaptive thresholding
30
+ block_size = max(11, min(31, int(img.shape[0] / 20) * 2 + 1))
31
+ thresh = cv2.adaptiveThreshold(
32
+ enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, block_size, 2
33
+ )
34
+
35
+ # Noise reduction
36
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
37
+ thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
38
+
39
+ return thresh
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  except Exception as e:
41
+ logging.error(f"Preprocessing failed: {str(e)}")
42
+ return img
43
 
44
+ def extract_weight_from_image(pil_img):
45
+ """Extract weight from any digital scale image."""
46
  try:
47
+ # Convert PIL image to OpenCV
48
+ img = np.array(pil_img)
49
+ img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
50
+
51
+ # Preprocess image
52
+ thresh = preprocess_image(img)
53
 
54
  # Try multiple Tesseract configurations
55
  configs = [
 
57
  r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.' # Block of text
58
  ]
59
  for config in configs:
60
+ text = pytesseract.image_to_string(thresh, config=config)
61
  logging.info(f"Tesseract raw output (config {config}): {text}")
62
+
63
+ # Clean and validate text
64
  text = re.sub(r"[^\d\.]", "", text)
65
  if text.count('.') > 1:
66
  text = text.replace('.', '', text.count('.') - 1)
 
68
  if text and re.fullmatch(r"^\d*\.?\d*$", text):
69
  text = text.lstrip('0') or '0'
70
  confidence = 95.0 if len(text.replace('.', '')) >= 3 else 90.0
71
+ try:
72
+ weight = float(text)
73
+ if 0.001 <= weight <= 5000:
74
+ logging.info(f"Detected weight: {text} kg, Confidence: {confidence:.2f}%")
75
+ return text, confidence
76
+ except ValueError:
77
+ logging.warning(f"Invalid weight format: {text}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  logging.info("No valid weight detected.")
80
  return "Not detected", 0.0
81
  except Exception as e: