Sanjayraju30 commited on
Commit
301eb4d
·
verified ·
1 Parent(s): dc1f7da

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +40 -262
ocr_engine.py CHANGED
@@ -3,286 +3,64 @@ import numpy as np
3
  import cv2
4
  import re
5
  import logging
6
- from datetime import datetime
7
- import os
8
  from PIL import Image
9
 
10
  # Set up logging
11
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
12
 
13
- # Directory for debug images
14
- DEBUG_DIR = "debug_images"
15
- os.makedirs(DEBUG_DIR, exist_ok=True)
16
-
17
- def save_debug_image(img, filename_suffix, prefix=""):
18
- """Save image to debug directory with timestamp."""
19
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
20
- filename = os.path.join(DEBUG_DIR, f"{prefix}{timestamp}_{filename_suffix}.png")
21
- if isinstance(img, Image.Image):
22
- img.save(filename)
23
- elif len(img.shape) == 3:
24
- cv2.imwrite(filename, cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
25
- else:
26
- cv2.imwrite(filename, img)
27
- logging.info(f"Saved debug image: {filename}")
28
-
29
- def estimate_brightness(img):
30
- """Estimate image brightness."""
31
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
32
- return np.mean(gray)
33
 
34
- def preprocess_image(img):
35
- """Preprocess image with enhanced contrast and adaptive thresholding."""
36
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
37
- brightness = estimate_brightness(img)
38
-
39
- # Apply CLAHE with dynamic clip limit
40
- clahe_clip = 10.0 if brightness < 80 else 5.0
41
- clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8))
42
- enhanced = clahe.apply(gray)
43
- save_debug_image(enhanced, "01_preprocess_clahe")
44
-
45
- # Stronger blur to reduce noise
46
- blurred = cv2.GaussianBlur(enhanced, (7, 7), 1.0)
47
- save_debug_image(blurred, "02_preprocess_blur")
48
-
49
- # Adaptive thresholding with larger block size
50
- block_size = max(11, min(41, int(img.shape[0] / 15) * 2 + 1))
51
  thresh = cv2.adaptiveThreshold(
52
- blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
53
- cv2.THRESH_BINARY_INV, block_size, 5
 
 
54
  )
55
-
56
- # Morphological operations for better digit separation
57
- kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
58
- thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
59
- save_debug_image(thresh, "03_preprocess_morph")
60
- return thresh, enhanced
61
-
62
- def correct_rotation(img):
63
- """Correct image rotation using edge detection."""
64
- try:
65
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
66
- edges = cv2.Canny(gray, 50, 150, apertureSize=3)
67
- lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=20, minLineLength=10, maxLineGap=5)
68
- if lines is not None:
69
- angles = [np.arctan2(line[0][3] - line[0][1], line[0][2] - line[0][0]) * 180 / np.pi for line in lines]
70
- angle = np.median(angles)
71
- if abs(angle) > 0.5:
72
- h, w = img.shape[:2]
73
- center = (w // 2, h // 2)
74
- M = cv2.getRotationMatrix2D(center, angle, 1.0)
75
- img = cv2.warpAffine(img, M, (w, h))
76
- save_debug_image(img, "00_rotated_image")
77
- logging.info(f"Applied rotation: {angle:.2f} degrees")
78
- return img
79
- except Exception as e:
80
- logging.error(f"Rotation correction failed: {str(e)}")
81
- return img
82
-
83
- def detect_roi(img):
84
- """Detect region of interest with relaxed contour analysis."""
85
- try:
86
- save_debug_image(img, "04_original")
87
- thresh, enhanced = preprocess_image(img)
88
- brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
89
- block_sizes = [max(11, min(41, int(img.shape[0] / s) * 2 + 1)) for s in [5, 10, 15]]
90
- valid_contours = []
91
- img_area = img.shape[0] * img.shape[1]
92
-
93
- for block_size in block_sizes:
94
- temp_thresh = cv2.adaptiveThreshold(
95
- enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
96
- cv2.THRESH_BINARY_INV, block_size, 5
97
- )
98
- kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
99
- temp_thresh = cv2.morphologyEx(temp_thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
100
- save_debug_image(temp_thresh, f"05_roi_threshold_block{block_size}")
101
- contours, _ = cv2.findContours(temp_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
102
-
103
- for c in contours:
104
- area = cv2.contourArea(c)
105
- x, y, w, h = cv2.boundingRect(c)
106
- roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
107
- aspect_ratio = w / h
108
- if (30 < area < (img_area * 0.98) and
109
- 0.02 <= aspect_ratio <= 25.0 and w > 15 and h > 5 and roi_brightness > 10):
110
- valid_contours.append((c, area * roi_brightness))
111
- logging.debug(f"Contour (block {block_size}): Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
112
-
113
- if valid_contours:
114
- contour, _ = max(valid_contours, key=lambda x: x[1])
115
- x, y, w, h = cv2.boundingRect(contour)
116
- padding = max(5, min(25, int(min(w, h) * 0.5)))
117
- x, y = max(0, x - padding), max(0, y - padding)
118
- w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
119
- roi_img = img[y:y+h, x:x+w]
120
- save_debug_image(roi_img, "06_detected_roi")
121
- logging.info(f"Detected ROI: ({x}, {y}, {w}, {h})")
122
- return roi_img, (x, y, w, h)
123
-
124
- logging.info("No ROI found, using full image.")
125
- save_debug_image(img, "06_no_roi_fallback")
126
- return img, None
127
- except Exception as e:
128
- logging.error(f"ROI detection failed: {str(e)}")
129
- save_debug_image(img, "06_roi_error_fallback")
130
- return img, None
131
-
132
- def detect_digit_template(digit_img, brightness):
133
- """Digit recognition with adjusted template matching."""
134
- try:
135
- h, w = digit_img.shape
136
- if h < 5 or w < 2:
137
- logging.debug("Digit image too small for template matching.")
138
- return None
139
 
140
- digit_templates = {
141
- '0': [np.array([[1, 1, 1, 1, 1], [1, 0, 0, 0, 1], [1, 0, 0, 0, 1], [1, 0, 0, 0, 1], [1, 1, 1, 1, 1]], dtype=np.float32)],
142
- '1': [np.array([[0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 1, 0, 0], [0, 0, 1, 0, 0]], dtype=np.float32)],
143
- '2': [np.array([[1, 1, 1, 1, 1], [0, 0, 0, 1, 1], [1, 1, 1, 1, 1], [1, 1, 0, 0, 0], [1, 1, 1, 1, 1]], dtype=np.float32)],
144
- '3': [np.array([[1, 1, 1, 1, 1], [0, 0, 0, 1, 1], [1, 1, 1, 1, 1], [0, 0, 0, 1, 1], [1, 1, 1, 1, 1]], dtype=np.float32)],
145
- '4': [np.array([[1, 1, 0, 0, 1], [1, 1, 0, 0, 1], [1, 1, 1, 1, 1], [0, 0, 0, 0, 1], [0, 0, 0, 0, 1]], dtype=np.float32)],
146
- '5': [np.array([[1, 1, 1, 1, 1], [1, 1, 0, 0, 0], [1, 1, 1, 1, 1], [0, 0, 0, 1, 1], [1, 1, 1, 1, 1]], dtype=np.float32)],
147
- '6': [np.array([[1, 1, 1, 1, 1], [1, 1, 0, 0, 0], [1, 1, 1, 1, 1], [1, 0, 0, 1, 1], [1, 1, 1, 1, 1]], dtype=np.float32)],
148
- '7': [np.array([[1, 1, 1, 1, 1], [0, 0, 0, 0, 1], [0, 0, 0, 0, 1], [0, 0, 0, 0, 1], [0, 0, 0, 0, 1]], dtype=np.float32)],
149
- '8': [np.array([[1, 1, 1, 1, 1], [1, 0, 0, 0, 1], [1, 1, 1, 1, 1], [1, 0, 0, 0, 1], [1, 1, 1, 1, 1]], dtype=np.float32)],
150
- '9': [np.array([[1, 1, 1, 1, 1], [1, 0, 0, 0, 1], [1, 1, 1, 1, 1], [0, 0, 0, 1, 1], [1, 1, 1, 1, 1]], dtype=np.float32)],
151
- '.': [np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]], dtype=np.float32)]
152
- }
153
-
154
- sizes = [(5, 5), (4, 4), (3, 3)] if h > w else [(3, 3), (2, 2)]
155
- best_match, best_score = None, -1
156
- for size in sizes:
157
- digit_img_resized = cv2.resize(digit_img, size, interpolation=cv2.INTER_AREA)
158
- digit_img_resized = (digit_img_resized > 90).astype(np.float32) # Adjusted binarization threshold
159
-
160
- for digit, templates in digit_templates.items():
161
- for template in templates:
162
- if template.shape[0] != size[0] or template.shape[1] != size[1]:
163
- continue
164
- result = cv2.matchTemplate(digit_img_resized, template, cv2.TM_CCOEFF_NORMED)
165
- _, max_val, _, _ = cv2.minMaxLoc(result)
166
- if max_val > 0.50 and max_val > best_score: # Lowered threshold
167
- best_score = max_val
168
- best_match = digit
169
- logging.debug(f"Template match: {best_match}, Score: {best_score:.2f}")
170
- return best_match if best_score > 0.50 else None
171
- except Exception as e:
172
- logging.error(f"Template digit detection failed: {str(e)}")
173
- return None
174
-
175
- def perform_ocr(img, roi_bbox):
176
- """Perform OCR with enhanced Tesseract and template fallback."""
177
- try:
178
- thresh, enhanced = preprocess_image(img)
179
- brightness = estimate_brightness(img)
180
- pil_img = Image.fromarray(enhanced)
181
- save_debug_image(pil_img, "07_ocr_input")
182
-
183
- # Enhanced Tesseract configurations
184
- configs = [
185
- r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.', # Single line
186
- r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.', # Block of text
187
- r'--oem 3 --psm 10 -c tessedit_char_whitelist=0123456789.' # Single character
188
- ]
189
- for config in configs:
190
- text = pytesseract.image_to_string(pil_img, config=config)
191
- logging.info(f"Tesseract raw output (config {config}): {text}")
192
- text = re.sub(r"[^\d\.]", "", text)
193
- if text.count('.') > 1:
194
- text = text.replace('.', '', text.count('.') - 1)
195
- text = text.strip('.')
196
- if text and re.fullmatch(r"^\d*\.?\d*$", text):
197
- text = text.lstrip('0') or '0'
198
- confidence = 95.0 if len(text.replace('.', '')) >= 3 else 90.0
199
- logging.info(f"Validated Tesseract text: {text}, Confidence: {confidence:.2f}%")
200
- return text, confidence
201
-
202
- # Enhanced template-based detection
203
- logging.info("Tesseract failed, using template-based detection.")
204
- contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
205
- digits_info = []
206
- for c in contours:
207
- x, y, w, h = cv2.boundingRect(c)
208
- if w > 3 and h > 4 and 0.02 <= w/h <= 5.0:
209
- digits_info.append((x, x+w, y, y+h))
210
-
211
- if digits_info:
212
- digits_info.sort(key=lambda x: x[0])
213
- recognized_text = ""
214
- prev_x_max = -float('inf')
215
- for idx, (x_min, x_max, y_min, y_max) in enumerate(digits_info):
216
- x_min, y_min = max(0, x_min), max(0, y_min)
217
- x_max, y_max = min(thresh.shape[1], x_max), min(thresh.shape[0], y_max)
218
- if x_max <= x_min or y_max <= y_min:
219
- continue
220
- digit_crop = thresh[y_min:y_max, x_min:x_max]
221
- save_debug_image(digit_crop, f"08_digit_crop_{idx}")
222
- digit = detect_digit_template(digit_crop, brightness)
223
- if digit:
224
- recognized_text += digit
225
- elif x_min - prev_x_max < 15 and prev_x_max != -float('inf'):
226
- recognized_text += '.'
227
- prev_x_max = x_max
228
-
229
- text = re.sub(r"[^\d\.]", "", recognized_text)
230
- if text.count('.') > 1:
231
- text = text.replace('.', '', text.count('.') - 1)
232
- text = text.strip('.')
233
- if text and re.fullmatch(r"^\d*\.?\d*$", text):
234
- text = text.lstrip('0') or '0'
235
- confidence = 90.0 if len(text.replace('.', '')) >= 3 else 85.0
236
- logging.info(f"Validated template text: {text}, Confidence: {confidence:.2f}%")
237
- return text, confidence
238
-
239
- logging.info("No valid digits detected.")
240
- return None, 0.0
241
- except Exception as e:
242
- logging.error(f"OCR failed: {str(e)}")
243
- return None, 0.0
244
 
245
  def extract_weight_from_image(pil_img):
246
- """Extract weight from any digital scale image with adjusted thresholds."""
247
  try:
 
248
  img = np.array(pil_img)
249
  img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
250
- save_debug_image(img, "00_input_image")
251
- img = correct_rotation(img)
252
- brightness = estimate_brightness(img)
253
- conf_threshold = 0.60 if brightness > 70 else 0.40 # Lowered threshold
254
 
255
- # Try ROI-based detection
256
- roi_img, roi_bbox = detect_roi(img)
257
- if roi_bbox:
258
- conf_threshold *= 1.2 if (roi_bbox[2] * roi_bbox[3]) > (img.shape[0] * img.shape[1] * 0.03) else 1.0
259
 
260
- result, confidence = perform_ocr(roi_img, roi_bbox)
261
- if result and confidence >= conf_threshold * 100:
262
- try:
263
- weight = float(result)
264
- if 0.001 <= weight <= 5000:
265
- logging.info(f"Detected weight: {result} kg, Confidence: {confidence:.2f}%")
266
- return result, confidence
267
- logging.warning(f"Weight {result} out of range.")
268
- except ValueError:
269
- logging.warning(f"Invalid weight format: {result}")
270
 
271
- # Full image fallback with relaxed threshold
272
- logging.info("Primary OCR failed, using full image fallback.")
273
- result, confidence = perform_ocr(img, None)
274
- if result and confidence >= conf_threshold * 0.80 * 100:
275
- try:
276
- weight = float(result)
277
- if 0.001 <= weight <= 5000:
278
- logging.info(f"Full image weight: {result} kg, Confidence: {confidence:.2f}%")
279
- return result, confidence
280
- logging.warning(f"Full image weight {result} out of range.")
281
- except ValueError:
282
- logging.warning(f"Invalid full image weight format: {result}")
283
 
284
- logging.info("No valid weight detected.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  return "Not detected", 0.0
 
286
  except Exception as e:
287
- logging.error(f"Weight extraction failed: {str(e)}")
288
- return "Not detected", 0.0
 
3
  import cv2
4
  import re
5
  import logging
 
 
6
  from PIL import Image
7
 
8
  # Set up logging
9
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
10
 
11
+ def preprocess_for_ocr(img):
12
+ """Apply grayscale, blur, and threshold to prepare image for OCR."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
14
+ blurred = cv2.GaussianBlur(gray, (5, 5), 0)
15
 
16
+ # Adaptive threshold
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  thresh = cv2.adaptiveThreshold(
18
+ blurred, 255,
19
+ cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
20
+ cv2.THRESH_BINARY,
21
+ 11, 2
22
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+ # Invert to make text white on black
25
+ inverted = cv2.bitwise_not(thresh)
26
+ return inverted
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  def extract_weight_from_image(pil_img):
29
+ """Extract weight reading from an image using pytesseract."""
30
  try:
31
+ # Convert PIL to OpenCV
32
  img = np.array(pil_img)
33
  img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
 
 
 
 
34
 
35
+ # Preprocess
36
+ processed_img = preprocess_for_ocr(img)
 
 
37
 
38
+ # Tesseract config
39
+ config = r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.'
 
 
 
 
 
 
 
 
40
 
41
+ # Run OCR
42
+ text = pytesseract.image_to_string(processed_img, config=config)
 
 
 
 
 
 
 
 
 
 
43
 
44
+ # Clean text
45
+ text = text.strip().replace('\n', '').replace(' ', '')
46
+ text = re.sub(r"[^\d.]", "", text)
47
+
48
+ # Handle multiple dots
49
+ if text.count('.') > 1:
50
+ text = text.replace('.', '', text.count('.') - 1)
51
+
52
+ if text.startswith('.'):
53
+ text = '0' + text
54
+
55
+ # Validate
56
+ if text and re.fullmatch(r"\d*\.?\d*", text):
57
+ value = float(text)
58
+ if 0.001 <= value <= 5000:
59
+ return text, 90.0 # Return with fixed confidence
60
+ else:
61
+ logging.warning(f"Detected weight out of range: {value}")
62
  return "Not detected", 0.0
63
+
64
  except Exception as e:
65
+ logging.error(f"OCR error: {str(e)}")
66
+ return "Not detected", 0.0