Sanjayraju30 commited on
Commit
25aa654
·
verified ·
1 Parent(s): 5699ebb

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +392 -20
ocr_engine.py CHANGED
@@ -3,36 +3,408 @@ import numpy as np
3
  import cv2
4
  import re
5
  import logging
 
 
6
  from PIL import Image
7
 
 
8
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def preprocess_image(img):
 
11
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
12
- resized = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR)
13
- blurred = cv2.GaussianBlur(resized, (3, 3), 0)
14
- thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
15
- cv2.THRESH_BINARY_INV, 11, 2)
16
- return thresh
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  def extract_weight_from_image(pil_img):
 
19
  try:
20
  img = np.array(pil_img)
21
  img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
22
- processed = preprocess_image(img)
23
-
24
- config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.kg'
25
- raw_text = pytesseract.image_to_string(processed, config=config)
26
- logging.info(f"OCR Raw Output: {raw_text}")
27
-
28
- cleaned = raw_text.replace(" ", "").replace("\n", "")
29
- match = re.search(r"(\d+\.?\d*)", cleaned)
30
- if match:
31
- value = float(match.group(1))
32
- if 0 < value <= 5000:
33
- return str(value), 90.0
34
- return "Not detected", 0.0
35
 
36
- except Exception as e:
37
- logging.error(f"OCR error: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  return "Not detected", 0.0
 
 
 
 
3
  import cv2
4
  import re
5
  import logging
6
+ from datetime import datetime
7
+ import os
8
  from PIL import Image
9
 
10
+ # Set up logging
11
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
12
 
13
+ # Directory for debug images
14
+ DEBUG_DIR = "debug_images"
15
+ os.makedirs(DEBUG_DIR, exist_ok=True)
16
+
17
+ def save_debug_image(img, filename_suffix, prefix=""):
18
+ """Save image to debug directory with timestamp."""
19
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
20
+ filename = os.path.join(DEBUG_DIR, f"{prefix}{timestamp}_{filename_suffix}.png")
21
+ if isinstance(img, Image.Image):
22
+ img.save(filename)
23
+ elif len(img.shape) == 3:
24
+ cv2.imwrite(filename, cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
25
+ else:
26
+ cv2.imwrite(filename, img)
27
+ logging.info(f"Saved debug image: {filename}")
28
+
29
+ def estimate_brightness(img):
30
+ """Estimate image brightness."""
31
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
32
+ return np.mean(gray)
33
+
34
  def preprocess_image(img):
35
+ """Preprocess image with simplified, robust contrast enhancement."""
36
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
37
+ brightness = estimate_brightness(img)
38
+
39
+ # Apply mild CLAHE for contrast
40
+ clahe_clip = 8.0 if brightness < 90 else 4.0
41
+ clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8))
42
+ enhanced = clahe.apply(gray)
43
+ save_debug_image(enhanced, "01_preprocess_clahe")
44
+
45
+ # Light blur to reduce noise
46
+ blurred = cv2.GaussianBlur(enhanced, (5, 5), 0)
47
+ save_debug_image(blurred, "02_preprocess_blur")
48
+
49
+ # Dynamic thresholding with larger block size for small displays
50
+ block_size = max(7, min(31, int(img.shape[0] / 20) * 2 + 1))
51
+ thresh = cv2.adaptiveThreshold(
52
+ blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
53
+ cv2.THRESH_BINARY_INV, block_size, 3
54
+ )
55
+
56
+ # Minimal morphological operations
57
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
58
+ thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
59
+ save_debug_image(thresh, "03_preprocess_morph")
60
+ return thresh, enhanced
61
+
62
+ def correct_rotation(img):
63
+ """Correct image rotation using edge detection."""
64
+ try:
65
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
66
+ edges = cv2.Canny(gray, 30, 100, apertureSize=3)
67
+ lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=25, minLineLength=15, maxLineGap=10)
68
+ if lines is not None:
69
+ angles = [np.arctan2(line[0][3] - line[0][1], line[0][2] - line[0][0]) * 180 / np.pi for line in lines]
70
+ angle = np.median(angles)
71
+ if abs(angle) > 0.3:
72
+ h, w = img.shape[:2]
73
+ center = (w // 2, h // 2)
74
+ M = cv2.getRotationMatrix2D(center, angle, 1.0)
75
+ img = cv2.warpAffine(img, M, (w, h))
76
+ save_debug_image(img, "00_rotated_image")
77
+ logging.info(f"Applied rotation: {angle:.2f} degrees")
78
+ return img
79
+ except Exception as e:
80
+ logging.error(f"Rotation correction failed: {str(e)}")
81
+ return img
82
+
83
+ def detect_roi(img):
84
+ """Detect region of interest with broader contour analysis."""
85
+ try:
86
+ save_debug_image(img, "04_original")
87
+ thresh, enhanced = preprocess_image(img)
88
+ brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
89
+ block_sizes = [max(7, min(31, int(img.shape[0] / s) * 2 + 1)) for s in [5, 10, 20]]
90
+ valid_contours = []
91
+ img_area = img.shape[0] * img.shape[1]
92
+
93
+ for block_size in block_sizes:
94
+ temp_thresh = cv2.adaptiveThreshold(
95
+ enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
96
+ cv2.THRESH_BINARY_INV, block_size, 3
97
+ )
98
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
99
+ temp_thresh = cv2.morphologyEx(temp_thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
100
+ save_debug_image(temp_thresh, f"05_roi_threshold_block{block_size}")
101
+ contours, _ = cv2.findContours(temp_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
102
+
103
+ for c in contours:
104
+ area = cv2.contourArea(c)
105
+ x, y, w, h = cv2.boundingRect(c)
106
+ roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
107
+ aspect_ratio = w / h
108
+ if (50 < area < (img_area * 0.95) and
109
+ 0.05 <= aspect_ratio <= 20.0 and w > 20 and h > 8 and roi_brightness > 15):
110
+ valid_contours.append((c, area * roi_brightness))
111
+ logging.debug(f"Contour (block {block_size}): Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
112
+
113
+ if valid_contours:
114
+ contour, _ = max(valid_contours, key=lambda x: x[1])
115
+ x, y, w, h = cv2.boundingRect(contour)
116
+ padding = max(5, min(20, int(min(w, h) * 0.4)))
117
+ x, y = max(0, x - padding), max(0, y - padding)
118
+ w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
119
+ roi_img = img[y:y+h, x:x+w]
120
+ save_debug_image(roi_img, "06_detected_roi")
121
+ logging.info(f"Detected ROI: ({x}, {y}, {w}, {h})")
122
+ return roi_img, (x, y, w, h)
123
+
124
+ logging.info("No ROI found, using full image.")
125
+ save_debug_image(img, "06_no_roi_fallback")
126
+ return img, None
127
+ except Exception as e:
128
+ logging.error(f"ROI detection failed: {str(e)}")
129
+ save_debug_image(img, "06_roi_error_fallback")
130
+ return img, None
131
+
132
+ def detect_digit_template(digit_img, brightness):
133
+ """Digit recognition with expanded template matching."""
134
+ try:
135
+ h, w = digit_img.shape
136
+ if h < 5 or w < 2:
137
+ logging.debug("Digit image too small for template matching.")
138
+ return None
139
+
140
+ # Expanded digit templates for seven-segment display variations
141
+ digit_templates = {
142
+ '0': [
143
+ np.array([[1, 1, 1, 1, 1],
144
+ [1, 0, 0, 0, 1],
145
+ [1, 0, 0, 0, 1],
146
+ [1, 0, 0, 0, 1],
147
+ [1, 1, 1, 1, 1]], dtype=np.float32),
148
+ np.array([[1, 1, 1, 1],
149
+ [1, 0, 0, 1],
150
+ [1, 0, 0, 1],
151
+ [1, 0, 0, 1],
152
+ [1, 1, 1, 1]], dtype=np.float32)
153
+ ],
154
+ '1': [
155
+ np.array([[0, 0, 1, 0, 0],
156
+ [0, 0, 1, 0, 0],
157
+ [0, 0, 1, 0, 0],
158
+ [0, 0, 1, 0, 0],
159
+ [0, 0, 1, 0, 0]], dtype=np.float32),
160
+ np.array([[0, 1, 0],
161
+ [0, 1, 0],
162
+ [0, 1, 0],
163
+ [0, 1, 0],
164
+ [0, 1, 0]], dtype=np.float32)
165
+ ],
166
+ '2': [
167
+ np.array([[1, 1, 1, 1, 1],
168
+ [0, 0, 0, 1, 1],
169
+ [1, 1, 1, 1, 1],
170
+ [1, 1, 0, 0, 0],
171
+ [1, 1, 1, 1, 1]], dtype=np.float32),
172
+ np.array([[1, 1, 1, 1],
173
+ [0, 0, 1, 1],
174
+ [1, 1, 1, 1],
175
+ [1, 1, 0, 0],
176
+ [1, 1, 1, 1]], dtype=np.float32)
177
+ ],
178
+ '3': [
179
+ np.array([[1, 1, 1, 1, 1],
180
+ [0, 0, 0, 1, 1],
181
+ [1, 1, 1, 1, 1],
182
+ [0, 0, 0, 1, 1],
183
+ [1, 1, 1, 1, 1]], dtype=np.float32),
184
+ np.array([[1, 1, 1, 1],
185
+ [0, 0, 1, 1],
186
+ [1, 1, 1, 1],
187
+ [0, 0, 1, 1],
188
+ [1, 1, 1, 1]], dtype=np.float32)
189
+ ],
190
+ '4': [
191
+ np.array([[1, 1, 0, 0, 1],
192
+ [1, 1, 0, 0, 1],
193
+ [1, 1, 1, 1, 1],
194
+ [0, 0, 0, 0, 1],
195
+ [0, 0, 0, 0, 1]], dtype=np.float32),
196
+ np.array([[1, 0, 0, 1],
197
+ [1, 0, 0, 1],
198
+ [1, 1, 1, 1],
199
+ [0, 0, 0, 1],
200
+ [0, 0, 0, 1]], dtype=np.float32)
201
+ ],
202
+ '5': [
203
+ np.array([[1, 1, 1, 1, 1],
204
+ [1, 1, 0, 0, 0],
205
+ [1, 1, 1, 1, 1],
206
+ [0, 0, 0, 1, 1],
207
+ [1, 1, 1, 1, 1]], dtype=np.float32),
208
+ np.array([[1, 1, 1, 1],
209
+ [1, 1, 0, 0],
210
+ [1, 1, 1, 1],
211
+ [0, 0, 1, 1],
212
+ [1, 1, 1, 1]], dtype=np.float32)
213
+ ],
214
+ '6': [
215
+ np.array([[1, 1, 1, 1, 1],
216
+ [1, 1, 0, 0, 0],
217
+ [1, 1, 1, 1, 1],
218
+ [1, 0, 0, 1, 1],
219
+ [1, 1, 1, 1, 1]], dtype=np.float32),
220
+ np.array([[1, 1, 1, 1],
221
+ [1, 1, 0, 0],
222
+ [1, 1, 1, 1],
223
+ [1, 0, 1, 1],
224
+ [1, 1, 1, 1]], dtype=np.float32)
225
+ ],
226
+ '7': [
227
+ np.array([[1, 1, 1, 1, 1],
228
+ [0, 0, 0, 0, 1],
229
+ [0, 0, 0, 0, 1],
230
+ [0, 0, 0, 0, 1],
231
+ [0, 0, 0, 0, 1]], dtype=np.float32),
232
+ np.array([[1, 1, 1, 1],
233
+ [0, 0, 0, 1],
234
+ [0, 0, 0, 1],
235
+ [0, 0, 0, 1],
236
+ [0, 0, 0, 1]], dtype=np.float32)
237
+ ],
238
+ '8': [
239
+ np.array([[1, 1, 1, 1, 1],
240
+ [1, 0, 0, 0, 1],
241
+ [1, 1, 1, 1, 1],
242
+ [1, 0, 0, 0, 1],
243
+ [1, 1, 1, 1, 1]], dtype=np.float32),
244
+ np.array([[1, 1, 1, 1],
245
+ [1, 0, 0, 1],
246
+ [1, 1, 1, 1],
247
+ [1, 0, 0, 1],
248
+ [1, 1, 1, 1]], dtype=np.float32)
249
+ ],
250
+ '9': [
251
+ np.array([[1, 1, 1, 1, 1],
252
+ [1, 0, 0, 0, 1],
253
+ [1, 1, 1, 1, 1],
254
+ [0, 0, 0, 1, 1],
255
+ [1, 1, 1, 1, 1]], dtype=np.float32),
256
+ np.array([[1, 1, 1, 1],
257
+ [1, 0, 0, 1],
258
+ [1, 1, 1, 1],
259
+ [0, 0, 1, 1],
260
+ [1, 1, 1, 1]], dtype=np.float32)
261
+ ],
262
+ '.': [
263
+ np.array([[0, 0, 0],
264
+ [0, 1, 0],
265
+ [0, 0, 0]], dtype=np.float32),
266
+ np.array([[0, 0],
267
+ [1, 0],
268
+ [0, 0]], dtype=np.float32)
269
+ ]
270
+ }
271
+
272
+ # Try multiple sizes for digit image
273
+ sizes = [(5, 5), (4, 4), (3, 3)] if h > w else [(3, 3), (2, 2)]
274
+ best_match, best_score = None, -1
275
+ for size in sizes:
276
+ digit_img_resized = cv2.resize(digit_img, size, interpolation=cv2.INTER_AREA)
277
+ digit_img_resized = (digit_img_resized > 100).astype(np.float32) # Binarize
278
+
279
+ for digit, templates in digit_templates.items():
280
+ for template in templates:
281
+ if digit == '.' and size[0] > 3:
282
+ continue
283
+ if digit != '.' and size[0] <= 3:
284
+ continue
285
+ if template.shape[0] != size[0] or template.shape[1] != size[1]:
286
+ continue
287
+ result = cv2.matchTemplate(digit_img_resized, template, cv2.TM_CCOEFF_NORMED)
288
+ _, max_val, _, _ = cv2.minMaxLoc(result)
289
+ if max_val > 0.55 and max_val > best_score: # Further lowered threshold
290
+ best_score = max_val
291
+ best_match = digit
292
+ logging.debug(f"Template match: {best_match}, Score: {best_score:.2f}")
293
+ return best_match if best_score > 0.55 else None
294
+ except Exception as e:
295
+ logging.error(f"Template digit detection failed: {str(e)}")
296
+ return None
297
+
298
+ def perform_ocr(img, roi_bbox):
299
+ """Perform OCR with Tesseract and robust template fallback."""
300
+ try:
301
+ thresh, enhanced = preprocess_image(img)
302
+ brightness = estimate_brightness(img)
303
+ pil_img = Image.fromarray(enhanced)
304
+ save_debug_image(pil_img, "07_ocr_input")
305
+
306
+ # Try multiple Tesseract configurations
307
+ configs = [
308
+ r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.', # Single line
309
+ r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.' # Block of text
310
+ ]
311
+ for config in configs:
312
+ text = pytesseract.image_to_string(pil_img, config=config)
313
+ logging.info(f"Tesseract raw output (config {config}): {text}")
314
+ text = re.sub(r"[^\d\.]", "", text)
315
+ if text.count('.') > 1:
316
+ text = text.replace('.', '', text.count('.') - 1)
317
+ text = text.strip('.')
318
+ if text and re.fullmatch(r"^\d*\.?\d*$", text):
319
+ text = text.lstrip('0') or '0'
320
+ confidence = 95.0 if len(text.replace('.', '')) >= 3 else 90.0
321
+ logging.info(f"Validated Tesseract text: {text}, Confidence: {confidence:.2f}%")
322
+ return text, confidence
323
+
324
+ # Fallback to template-based detection
325
+ logging.info("Tesseract failed, using template-based detection.")
326
+ contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
327
+ digits_info = []
328
+ for c in contours:
329
+ x, y, w, h = cv2.boundingRect(c)
330
+ if w > 4 and h > 5 and 0.03 <= w/h <= 4.0:
331
+ digits_info.append((x, x+w, y, y+h))
332
+
333
+ if digits_info:
334
+ digits_info.sort(key=lambda x: x[0])
335
+ recognized_text = ""
336
+ prev_x_max = -float('inf')
337
+ for idx, (x_min, x_max, y_min, y_max) in enumerate(digits_info):
338
+ x_min, y_min = max(0, x_min), max(0, y_min)
339
+ x_max, y_max = min(thresh.shape[1], x_max), min(thresh.shape[0], y_max)
340
+ if x_max <= x_min or y_max <= y_min:
341
+ continue
342
+ digit_crop = thresh[y_min:y_max, x_min:x_max]
343
+ save_debug_image(digit_crop, f"08_digit_crop_{idx}")
344
+ digit = detect_digit_template(digit_crop, brightness)
345
+ if digit:
346
+ recognized_text += digit
347
+ elif x_min - prev_x_max < 10 and prev_x_max != -float('inf'):
348
+ recognized_text += '.'
349
+ prev_x_max = x_max
350
+
351
+ text = re.sub(r"[^\d\.]", "", recognized_text)
352
+ if text.count('.') > 1:
353
+ text = text.replace('.', '', text.count('.') - 1)
354
+ text = text.strip('.')
355
+ if text and re.fullmatch(r"^\d*\.?\d*$", text):
356
+ text = text.lstrip('0') or '0'
357
+ confidence = 90.0 if len(text.replace('.', '')) >= 3 else 85.0
358
+ logging.info(f"Validated template text: {text}, Confidence: {confidence:.2f}%")
359
+ return text, confidence
360
+
361
+ logging.info("No valid digits detected.")
362
+ return None, 0.0
363
+ except Exception as e:
364
+ logging.error(f"OCR failed: {str(e)}")
365
+ return None, 0.0
366
 
367
  def extract_weight_from_image(pil_img):
368
+ """Extract weight from any digital scale image."""
369
  try:
370
  img = np.array(pil_img)
371
  img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
372
+ save_debug_image(img, "00_input_image")
373
+ img = correct_rotation(img)
374
+ brightness = estimate_brightness(img)
375
+ conf_threshold = 0.65 if brightness > 70 else 0.45
 
 
 
 
 
 
 
 
 
376
 
377
+ # Try ROI-based detection
378
+ roi_img, roi_bbox = detect_roi(img)
379
+ if roi_bbox:
380
+ conf_threshold *= 1.15 if (roi_bbox[2] * roi_bbox[3]) > (img.shape[0] * img.shape[1] * 0.05) else 1.0
381
+
382
+ result, confidence = perform_ocr(roi_img, roi_bbox)
383
+ if result and confidence >= conf_threshold * 100:
384
+ try:
385
+ weight = float(result)
386
+ if 0.001 <= weight <= 5000:
387
+ logging.info(f"Detected weight: {result} kg, Confidence: {confidence:.2f}%")
388
+ return result, confidence
389
+ logging.warning(f"Weight {result} out of range.")
390
+ except ValueError:
391
+ logging.warning(f"Invalid weight format: {result}")
392
+
393
+ # Full image fallback
394
+ logging.info("Primary OCR failed, using full image fallback.")
395
+ result, confidence = perform_ocr(img, None)
396
+ if result and confidence >= conf_threshold * 0.85 * 100:
397
+ try:
398
+ weight = float(result)
399
+ if 0.001 <= weight <= 5000:
400
+ logging.info(f"Full image weight: {result} kg, Confidence: {confidence:.2f}%")
401
+ return result, confidence
402
+ logging.warning(f"Full image weight {result} out of range.")
403
+ except ValueError:
404
+ logging.warning(f"Invalid full image weight format: {result}")
405
+
406
+ logging.info("No valid weight detected.")
407
  return "Not detected", 0.0
408
+ except Exception as e:
409
+ logging.error(f"Weight extraction failed: {str(e)}")
410
+ return "Not detected", 0.0