Sanjayraju30 commited on
Commit
4c95d04
·
verified ·
1 Parent(s): 12c2109

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +119 -116
ocr_engine.py CHANGED
@@ -19,17 +19,13 @@ def detect_roi(img):
19
  """Detect and crop the region of interest (likely the digital display)"""
20
  try:
21
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
22
- # Stricter threshold for bright areas
23
  brightness = estimate_brightness(img)
24
  thresh_value = 230 if brightness > 100 else 190
25
  _, thresh = cv2.threshold(gray, thresh_value, 255, cv2.THRESH_BINARY)
26
- # Morphological operations to connect digits
27
  kernel = np.ones((9, 9), np.uint8)
28
  dilated = cv2.dilate(thresh, kernel, iterations=3)
29
- # Find contours
30
  contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
31
  if contours:
32
- # Filter contours by size and aspect ratio
33
  valid_contours = [c for c in contours if cv2.contourArea(c) > 500]
34
  if valid_contours:
35
  for contour in sorted(valid_contours, key=cv2.contourArea, reverse=True):
@@ -38,124 +34,139 @@ def detect_roi(img):
38
  if 1.5 <= aspect_ratio <= 4.0 and w > 50 and h > 30:
39
  x, y = max(0, x-40), max(0, y-40)
40
  w, h = min(w+80, img.shape[1]-x), min(h+80, img.shape[0]-y)
41
- return img[y:y+h, x:x+w]
42
- return img
43
  except Exception as e:
44
  logging.error(f"ROI detection failed: {str(e)}")
45
- return img
46
-
47
- def correct_seven_segment(text, bbox, img):
48
- """Correct common seven-segment misreads based on bounding box and pixel distribution"""
49
- if "2" in text or "6" in text:
50
- # Extract bounding box coordinates
51
- (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
52
- x_min, x_max = min(x1, x4), max(x2, x3)
53
- y_min, y_max = min(y1, y2), max(y3, y4)
54
- # Ensure bounds are within image
55
- x_min, y_min = max(0, int(x_min)), max(0, int(y_min))
56
- x_max, y_max = min(img.shape[1], int(x_max)), min(img.shape[0], int(y_max))
57
- if x_max <= x_min or y_max <= y_min:
58
- return text
59
- # Crop the digit area
60
- digit_area = img[y_min:y_max, x_min:x_max]
61
- if digit_area.size == 0:
62
- return text
63
- # Convert to grayscale and threshold
64
- gray = cv2.cvtColor(digit_area, cv2.COLOR_BGR2GRAY)
65
- _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
66
- # Calculate pixel distribution in upper vs lower half
67
- h, w = thresh.shape
68
- upper_half = thresh[:h//2, :]
69
- lower_half = thresh[h//2:, :]
70
- upper_pixels = np.sum(upper_half == 255)
71
- lower_pixels = np.sum(lower_half == 255)
72
- # "6" has more pixels in the lower half due to the loop, "2" is more balanced
73
- if lower_pixels > upper_pixels * 1.5:
74
- text = text.replace("2", "6")
75
- else:
76
- text = text.replace("6", "2")
77
- return text
78
-
79
- def enhance_image(img, mode="standard"):
80
- """Enhance image with different modes for multi-scale processing"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  try:
82
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 
83
 
84
- if mode == "seven_segment":
85
- # Extremely minimal preprocessing for seven-segment displays
86
- _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
87
- return thresh
88
- elif mode == "minimal":
89
- # No blurring, just threshold
90
- _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
91
- return thresh
92
- elif mode == "raw":
93
- # No preprocessing, just convert to grayscale
94
- return gray
95
- elif mode == "high_contrast":
96
- denoised = cv2.bilateralFilter(gray, d=11, sigmaColor=100, sigmaSpace=100)
97
- clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
98
- thresh = clahe.apply(denoised)
99
- elif mode == "low_noise":
100
- denoised = cv2.bilateralFilter(gray, d=7, sigmaColor=50, sigmaSpace=50)
101
- clahe = cv2.createCLAHE(clipLimit=1.5, tileGridSize=(8, 8))
102
- thresh = clahe.apply(denoised)
103
- else:
104
- denoised = cv2.bilateralFilter(gray, d=9, sigmaColor=75, sigmaSpace=75)
105
- clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
106
- thresh = clahe.apply(denoised)
107
-
108
- if mode not in ["seven_segment", "minimal", "raw"]:
109
- thresh = cv2.adaptiveThreshold(thresh, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
110
- cv2.THRESH_BINARY, 11, 2)
111
-
112
- # Morphological operations only for non-seven-segment modes
113
- if mode not in ["seven_segment", "minimal", "raw"]:
114
- kernel = np.ones((3, 3), np.uint8)
115
- morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
116
- else:
117
- morphed = thresh
118
-
119
- # Skip sharpening for seven-segment, minimal, and raw modes
120
- if mode not in ["seven_segment", "minimal", "raw"]:
121
- brightness = estimate_brightness(img)
122
- sharpen_strength = 3 if brightness > 100 else 5
123
- sharpen_kernel = np.array([[0, -1, 0], [-1, sharpen_strength, -1], [0, -1, 0]])
124
- morphed = cv2.filter2D(morphed, -1, sharpen_kernel)
125
-
126
- # Dynamic resizing
127
- h, w = morphed.shape
128
- target_size = 800
129
- scale_factor = min(target_size / max(h, w), 2.0) if max(h, w) < 300 else min(target_size / max(h, w), 1.0)
130
- if scale_factor != 1.0:
131
- morphed = cv2.resize(morphed, None, fx=scale_factor, fy=scale_factor,
132
- interpolation=cv2.INTER_CUBIC if scale_factor > 1 else cv2.INTER_AREA)
133
-
134
- return morphed
135
  except Exception as e:
136
- logging.error(f"Image enhancement failed (mode={mode}): {str(e)}")
137
- return img
138
 
139
  def extract_weight_from_image(pil_img):
140
  try:
141
  img = np.array(pil_img)
142
  img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
143
 
144
- # Estimate brightness for adaptive thresholding
145
  brightness = estimate_brightness(img)
146
- conf_threshold = 0.8 if brightness > 100 else 0.6 # Stricter for bright displays
147
 
148
  # Detect ROI
149
- roi_img = detect_roi(img)
150
-
151
- # Process multiple image versions
 
 
 
 
 
 
 
 
 
 
 
 
152
  images_to_process = [
153
- ("seven_segment", enhance_image(roi_img, mode="seven_segment"), {'contrast_ths': 0.15, 'adjust_contrast': 0.7, 'text_threshold': 0.8, 'allowlist': '0123456789.'}),
154
- ("minimal", enhance_image(roi_img, mode="minimal"), {'contrast_ths': 0.15, 'adjust_contrast': 0.7, 'text_threshold': 0.8, 'allowlist': '0123456789.'}),
155
- ("raw", enhance_image(roi_img, mode="raw"), {'contrast_ths': 0.15, 'adjust_contrast': 0.7, 'text_threshold': 0.8, 'allowlist': '0123456789.'}),
156
- ("standard", enhance_image(roi_img, mode="standard"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5, 'text_threshold': 0.7}),
157
- ("high_contrast", enhance_image(roi_img, mode="high_contrast"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5, 'text_threshold': 0.7}),
158
- ("low_noise", enhance_image(roi_img, mode="low_noise"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5, 'text_threshold': 0.7}),
159
  ]
160
 
161
  best_weight = None
@@ -163,16 +174,12 @@ def extract_weight_from_image(pil_img):
163
  best_score = 0.0
164
 
165
  for mode, proc_img, ocr_params in images_to_process:
166
- # EasyOCR detection
 
167
  results = easyocr_reader.readtext(proc_img, detail=1, paragraph=False, **ocr_params)
168
 
169
  for (bbox, text, conf) in results:
170
- # Apply seven-segment correction
171
- text = correct_seven_segment(text, bbox, roi_img)
172
- original_text = text
173
  text = text.lower().strip()
174
-
175
- # Fix common OCR errors
176
  text = text.replace(",", ".").replace(";", ".")
177
  text = text.replace("o", "0").replace("O", "0")
178
  text = text.replace("s", "5").replace("S", "5")
@@ -184,13 +191,10 @@ def extract_weight_from_image(pil_img):
184
  text = text.replace("kgs", "").replace("kg", "").replace("k", "")
185
  text = re.sub(r"[^\d\.]", "", text)
186
 
187
- # Regex for weight (0.0 to 9999.999)
188
  if re.fullmatch(r"\d{1,4}(\.\d{0,3})?", text):
189
  try:
190
  weight = float(text)
191
- # Score based on realistic weight range (0.1–500 kg)
192
  range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
193
- # Prefer two-digit weights for scales
194
  digit_score = 1.5 if 10 <= weight < 100 else 1.0
195
  score = conf * range_score * digit_score
196
  if score > best_score and conf > conf_threshold:
@@ -204,7 +208,6 @@ def extract_weight_from_image(pil_img):
204
  logging.info("No valid weight detected")
205
  return "Not detected", 0.0
206
 
207
- # Format output
208
  if "." in best_weight:
209
  int_part, dec_part = best_weight.split(".")
210
  int_part = int_part.lstrip("0") or "0"
 
19
  """Detect and crop the region of interest (likely the digital display)"""
20
  try:
21
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 
22
  brightness = estimate_brightness(img)
23
  thresh_value = 230 if brightness > 100 else 190
24
  _, thresh = cv2.threshold(gray, thresh_value, 255, cv2.THRESH_BINARY)
 
25
  kernel = np.ones((9, 9), np.uint8)
26
  dilated = cv2.dilate(thresh, kernel, iterations=3)
 
27
  contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
28
  if contours:
 
29
  valid_contours = [c for c in contours if cv2.contourArea(c) > 500]
30
  if valid_contours:
31
  for contour in sorted(valid_contours, key=cv2.contourArea, reverse=True):
 
34
  if 1.5 <= aspect_ratio <= 4.0 and w > 50 and h > 30:
35
  x, y = max(0, x-40), max(0, y-40)
36
  w, h = min(w+80, img.shape[1]-x), min(h+80, img.shape[0]-y)
37
+ return img[y:y+h, x:x+w], (x, y, w, h)
38
+ return img, None
39
  except Exception as e:
40
  logging.error(f"ROI detection failed: {str(e)}")
41
+ return img, None
42
+
43
+ def detect_segments(digit_img):
44
+ """Detect seven-segment patterns in a digit image"""
45
+ h, w = digit_img.shape
46
+ if h < 10 or w < 10:
47
+ return None
48
+
49
+ # Define segment regions (top, middle, bottom, left-top, left-bottom, right-top, right-bottom)
50
+ segments = {
51
+ 'top': (0, w, 0, h//5),
52
+ 'middle': (0, w, 2*h//5, 3*h//5),
53
+ 'bottom': (0, w, 4*h//5, h),
54
+ 'left_top': (0, w//5, 0, h//2),
55
+ 'left_bottom': (0, w//5, h//2, h),
56
+ 'right_top': (4*w//5, w, 0, h//2),
57
+ 'right_bottom': (4*w//5, w, h//2, h)
58
+ }
59
+
60
+ segment_presence = {}
61
+ for name, (x1, x2, y1, y2) in segments.items():
62
+ region = digit_img[y1:y2, x1:x2]
63
+ if region.size == 0:
64
+ return None
65
+ # Count white pixels in the region
66
+ pixel_count = np.sum(region == 255)
67
+ total_pixels = region.size
68
+ # Segment is present if more than 50% of the region is white
69
+ segment_presence[name] = pixel_count > total_pixels * 0.5
70
+
71
+ # Seven-segment digit patterns
72
+ digit_patterns = {
73
+ '0': ('top', 'bottom', 'left_top', 'left_bottom', 'right_top', 'right_bottom'),
74
+ '1': ('right_top', 'right_bottom'),
75
+ '2': ('top', 'middle', 'bottom', 'left_bottom', 'right_top'),
76
+ '3': ('top', 'middle', 'bottom', 'right_top', 'right_bottom'),
77
+ '4': ('middle', 'left_top', 'right_top', 'right_bottom'),
78
+ '5': ('top', 'middle', 'bottom', 'left_top', 'right_bottom'),
79
+ '6': ('top', 'middle', 'bottom', 'left_top', 'left_bottom', 'right_bottom'),
80
+ '7': ('top', 'right_top', 'right_bottom'),
81
+ '8': ('top', 'middle', 'bottom', 'left_top', 'left_bottom', 'right_top', 'right_bottom'),
82
+ '9': ('top', 'middle', 'bottom', 'left_top', 'right_top', 'right_bottom')
83
+ }
84
+
85
+ best_match = None
86
+ max_matches = 0
87
+ for digit, pattern in digit_patterns.items():
88
+ matches = sum(1 for segment in pattern if segment_presence.get(segment, False))
89
+ non_matches = sum(1 for segment in segment_presence if segment not in pattern and segment_presence[segment])
90
+ score = matches - non_matches
91
+ if score > max_matches:
92
+ max_matches = score
93
+ best_match = digit
94
+
95
+ return best_match
96
+
97
+ def custom_seven_segment_ocr(img, roi_bbox):
98
+ """Perform custom OCR for seven-segment displays"""
99
  try:
100
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
101
+ _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
102
 
103
+ # Use EasyOCR to get bounding boxes for digits
104
+ results = easyocr_reader.readtext(thresh, detail=1, paragraph=False,
105
+ contrast_ths=0.1, adjust_contrast=0.7,
106
+ text_threshold=0.9, mag_ratio=1.5,
107
+ allowlist='0123456789.')
108
+
109
+ if not results:
110
+ return None
111
+
112
+ # Sort bounding boxes left to right
113
+ digits = []
114
+ for (bbox, _, _) in results:
115
+ (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
116
+ x_min, x_max = min(x1, x4), max(x2, x3)
117
+ y_min, y_max = min(y1, y2), max(y3, y4)
118
+ digits.append((x_min, x_max, y_min, y_max))
119
+
120
+ digits.sort(key=lambda x: x[0]) # Sort by x_min (left to right)
121
+
122
+ # Extract and recognize each digit
123
+ recognized_text = ""
124
+ for x_min, x_max, y_min, y_max in digits:
125
+ x_min, y_min = max(0, int(x_min)), max(0, int(y_min))
126
+ x_max, y_max = min(thresh.shape[1], int(x_max)), min(thresh.shape[0], int(y_max))
127
+ if x_max <= x_min or y_max <= y_min:
128
+ continue
129
+ digit_img = thresh[y_min:y_max, x_min:x_max]
130
+ digit = detect_segments(digit_img)
131
+ if digit:
132
+ recognized_text += digit
133
+
134
+ # Validate the recognized text
135
+ text = recognized_text
136
+ text = re.sub(r"[^\d\.]", "", text)
137
+ if re.fullmatch(r"\d{1,4}(\.\d{0,3})?", text):
138
+ return text
139
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  except Exception as e:
141
+ logging.error(f"Custom seven-segment OCR failed: {str(e)}")
142
+ return None
143
 
144
  def extract_weight_from_image(pil_img):
145
  try:
146
  img = np.array(pil_img)
147
  img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
148
 
 
149
  brightness = estimate_brightness(img)
150
+ conf_threshold = 0.9 if brightness > 100 else 0.7
151
 
152
  # Detect ROI
153
+ roi_img, roi_bbox = detect_roi(img)
154
+
155
+ # Try custom seven-segment OCR first
156
+ custom_result = custom_seven_segment_ocr(roi_img, roi_bbox)
157
+ if custom_result:
158
+ # Format the custom result
159
+ if "." in custom_result:
160
+ int_part, dec_part = custom_result.split(".")
161
+ int_part = int_part.lstrip("0") or "0"
162
+ custom_result = f"{int_part}.{dec_part.rstrip('0')}"
163
+ else:
164
+ custom_result = custom_result.lstrip('0') or "0"
165
+ return custom_result, 100.0 # High confidence for custom OCR
166
+
167
+ # Fallback to EasyOCR if custom OCR fails
168
  images_to_process = [
169
+ ("raw", roi_img, {'contrast_ths': 0.1, 'adjust_contrast': 0.7, 'text_threshold': 0.9, 'mag_ratio': 1.5, 'allowlist': '0123456789.'}),
 
 
 
 
 
170
  ]
171
 
172
  best_weight = None
 
174
  best_score = 0.0
175
 
176
  for mode, proc_img, ocr_params in images_to_process:
177
+ if mode == "raw":
178
+ proc_img = cv2.cvtColor(proc_img, cv2.COLOR_BGR2GRAY)
179
  results = easyocr_reader.readtext(proc_img, detail=1, paragraph=False, **ocr_params)
180
 
181
  for (bbox, text, conf) in results:
 
 
 
182
  text = text.lower().strip()
 
 
183
  text = text.replace(",", ".").replace(";", ".")
184
  text = text.replace("o", "0").replace("O", "0")
185
  text = text.replace("s", "5").replace("S", "5")
 
191
  text = text.replace("kgs", "").replace("kg", "").replace("k", "")
192
  text = re.sub(r"[^\d\.]", "", text)
193
 
 
194
  if re.fullmatch(r"\d{1,4}(\.\d{0,3})?", text):
195
  try:
196
  weight = float(text)
 
197
  range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
 
198
  digit_score = 1.5 if 10 <= weight < 100 else 1.0
199
  score = conf * range_score * digit_score
200
  if score > best_score and conf > conf_threshold:
 
208
  logging.info("No valid weight detected")
209
  return "Not detected", 0.0
210
 
 
211
  if "." in best_weight:
212
  int_part, dec_part = best_weight.split(".")
213
  int_part = int_part.lstrip("0") or "0"