Sanjayraju30 commited on
Commit
12c2109
·
verified ·
1 Parent(s): 7c31f9a

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +55 -31
ocr_engine.py CHANGED
@@ -21,7 +21,7 @@ def detect_roi(img):
21
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
22
  # Stricter threshold for bright areas
23
  brightness = estimate_brightness(img)
24
- thresh_value = 220 if brightness > 100 else 180
25
  _, thresh = cv2.threshold(gray, thresh_value, 255, cv2.THRESH_BINARY)
26
  # Morphological operations to connect digits
27
  kernel = np.ones((9, 9), np.uint8)
@@ -29,13 +29,13 @@ def detect_roi(img):
29
  # Find contours
30
  contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
31
  if contours:
32
- # Filter contours by size and aspect ratio (typical for displays)
33
  valid_contours = [c for c in contours if cv2.contourArea(c) > 500]
34
  if valid_contours:
35
  for contour in sorted(valid_contours, key=cv2.contourArea, reverse=True):
36
  x, y, w, h = cv2.boundingRect(contour)
37
  aspect_ratio = w / h
38
- if 1.5 <= aspect_ratio <= 4.0 and w > 50 and h > 30: # Typical display aspect ratio
39
  x, y = max(0, x-40), max(0, y-40)
40
  w, h = min(w+80, img.shape[1]-x), min(h+80, img.shape[0]-y)
41
  return img[y:y+h, x:x+w]
@@ -44,16 +44,35 @@ def detect_roi(img):
44
  logging.error(f"ROI detection failed: {str(e)}")
45
  return img
46
 
47
- def correct_seven_segment(text, bbox):
48
- """Correct common seven-segment misreads based on bounding box shape"""
49
- if "6" in text:
50
- # Check bounding box aspect ratio to differentiate "6" from "2"
51
  (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
52
- width = abs(x2 - x1)
53
- height = abs(y2 - y1)
54
- aspect_ratio = width / height if height > 0 else 1.0
55
- # "2" typically has a more rectangular shape in seven-segment
56
- if aspect_ratio > 0.5: # Adjust based on typical "2" vs "6" shapes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  text = text.replace("6", "2")
58
  return text
59
 
@@ -63,14 +82,16 @@ def enhance_image(img, mode="standard"):
63
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
64
 
65
  if mode == "seven_segment":
66
- # Minimal preprocessing for seven-segment displays
67
  _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
68
  return thresh
69
  elif mode == "minimal":
70
- # Very light preprocessing
71
- denoised = cv2.GaussianBlur(gray, (3, 3), 0)
72
- _, thresh = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
73
  return thresh
 
 
 
74
  elif mode == "high_contrast":
75
  denoised = cv2.bilateralFilter(gray, d=11, sigmaColor=100, sigmaSpace=100)
76
  clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
@@ -84,16 +105,19 @@ def enhance_image(img, mode="standard"):
84
  clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
85
  thresh = clahe.apply(denoised)
86
 
87
- if mode not in ["seven_segment", "minimal"]:
88
  thresh = cv2.adaptiveThreshold(thresh, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
89
  cv2.THRESH_BINARY, 11, 2)
90
 
91
- # Morphological operations
92
- kernel = np.ones((3, 3), np.uint8)
93
- morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
 
 
 
94
 
95
- # Skip sharpening for seven-segment and minimal modes
96
- if mode not in ["seven_segment", "minimal"]:
97
  brightness = estimate_brightness(img)
98
  sharpen_strength = 3 if brightness > 100 else 5
99
  sharpen_kernel = np.array([[0, -1, 0], [-1, sharpen_strength, -1], [0, -1, 0]])
@@ -119,19 +143,19 @@ def extract_weight_from_image(pil_img):
119
 
120
  # Estimate brightness for adaptive thresholding
121
  brightness = estimate_brightness(img)
122
- conf_threshold = 0.7 if brightness > 100 else 0.5 # Stricter for bright displays
123
 
124
  # Detect ROI
125
  roi_img = detect_roi(img)
126
 
127
  # Process multiple image versions
128
  images_to_process = [
129
- ("seven_segment", enhance_image(roi_img, mode="seven_segment"), {'contrast_ths': 0.2, 'adjust_contrast': 0.5, 'allowlist': '0123456789.'}),
130
- ("minimal", enhance_image(roi_img, mode="minimal"), {'contrast_ths': 0.2, 'adjust_contrast': 0.5, 'allowlist': '0123456789.'}),
131
- ("standard", enhance_image(roi_img, mode="standard"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5}),
132
- ("high_contrast", enhance_image(roi_img, mode="high_contrast"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5}),
133
- ("low_noise", enhance_image(roi_img, mode="low_noise"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5}),
134
- ("original", roi_img, {'contrast_ths': 0.2, 'adjust_contrast': 0.5, 'allowlist': '0123456789.'})
135
  ]
136
 
137
  best_weight = None
@@ -144,7 +168,7 @@ def extract_weight_from_image(pil_img):
144
 
145
  for (bbox, text, conf) in results:
146
  # Apply seven-segment correction
147
- text = correct_seven_segment(text, bbox)
148
  original_text = text
149
  text = text.lower().strip()
150
 
@@ -166,7 +190,7 @@ def extract_weight_from_image(pil_img):
166
  weight = float(text)
167
  # Score based on realistic weight range (0.1–500 kg)
168
  range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
169
- # Strongly prefer two-digit weights for scales
170
  digit_score = 1.5 if 10 <= weight < 100 else 1.0
171
  score = conf * range_score * digit_score
172
  if score > best_score and conf > conf_threshold:
 
21
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
22
  # Stricter threshold for bright areas
23
  brightness = estimate_brightness(img)
24
+ thresh_value = 230 if brightness > 100 else 190
25
  _, thresh = cv2.threshold(gray, thresh_value, 255, cv2.THRESH_BINARY)
26
  # Morphological operations to connect digits
27
  kernel = np.ones((9, 9), np.uint8)
 
29
  # Find contours
30
  contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
31
  if contours:
32
+ # Filter contours by size and aspect ratio
33
  valid_contours = [c for c in contours if cv2.contourArea(c) > 500]
34
  if valid_contours:
35
  for contour in sorted(valid_contours, key=cv2.contourArea, reverse=True):
36
  x, y, w, h = cv2.boundingRect(contour)
37
  aspect_ratio = w / h
38
+ if 1.5 <= aspect_ratio <= 4.0 and w > 50 and h > 30:
39
  x, y = max(0, x-40), max(0, y-40)
40
  w, h = min(w+80, img.shape[1]-x), min(h+80, img.shape[0]-y)
41
  return img[y:y+h, x:x+w]
 
44
  logging.error(f"ROI detection failed: {str(e)}")
45
  return img
46
 
47
+ def correct_seven_segment(text, bbox, img):
48
+ """Correct common seven-segment misreads based on bounding box and pixel distribution"""
49
+ if "2" in text or "6" in text:
50
+ # Extract bounding box coordinates
51
  (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
52
+ x_min, x_max = min(x1, x4), max(x2, x3)
53
+ y_min, y_max = min(y1, y2), max(y3, y4)
54
+ # Ensure bounds are within image
55
+ x_min, y_min = max(0, int(x_min)), max(0, int(y_min))
56
+ x_max, y_max = min(img.shape[1], int(x_max)), min(img.shape[0], int(y_max))
57
+ if x_max <= x_min or y_max <= y_min:
58
+ return text
59
+ # Crop the digit area
60
+ digit_area = img[y_min:y_max, x_min:x_max]
61
+ if digit_area.size == 0:
62
+ return text
63
+ # Convert to grayscale and threshold
64
+ gray = cv2.cvtColor(digit_area, cv2.COLOR_BGR2GRAY)
65
+ _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
66
+ # Calculate pixel distribution in upper vs lower half
67
+ h, w = thresh.shape
68
+ upper_half = thresh[:h//2, :]
69
+ lower_half = thresh[h//2:, :]
70
+ upper_pixels = np.sum(upper_half == 255)
71
+ lower_pixels = np.sum(lower_half == 255)
72
+ # "6" has more pixels in the lower half due to the loop, "2" is more balanced
73
+ if lower_pixels > upper_pixels * 1.5:
74
+ text = text.replace("2", "6")
75
+ else:
76
  text = text.replace("6", "2")
77
  return text
78
 
 
82
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
83
 
84
  if mode == "seven_segment":
85
+ # Extremely minimal preprocessing for seven-segment displays
86
  _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
87
  return thresh
88
  elif mode == "minimal":
89
+ # No blurring, just threshold
90
+ _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
 
91
  return thresh
92
+ elif mode == "raw":
93
+ # No preprocessing, just convert to grayscale
94
+ return gray
95
  elif mode == "high_contrast":
96
  denoised = cv2.bilateralFilter(gray, d=11, sigmaColor=100, sigmaSpace=100)
97
  clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
 
105
  clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
106
  thresh = clahe.apply(denoised)
107
 
108
+ if mode not in ["seven_segment", "minimal", "raw"]:
109
  thresh = cv2.adaptiveThreshold(thresh, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
110
  cv2.THRESH_BINARY, 11, 2)
111
 
112
+ # Morphological operations only for non-seven-segment modes
113
+ if mode not in ["seven_segment", "minimal", "raw"]:
114
+ kernel = np.ones((3, 3), np.uint8)
115
+ morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
116
+ else:
117
+ morphed = thresh
118
 
119
+ # Skip sharpening for seven-segment, minimal, and raw modes
120
+ if mode not in ["seven_segment", "minimal", "raw"]:
121
  brightness = estimate_brightness(img)
122
  sharpen_strength = 3 if brightness > 100 else 5
123
  sharpen_kernel = np.array([[0, -1, 0], [-1, sharpen_strength, -1], [0, -1, 0]])
 
143
 
144
  # Estimate brightness for adaptive thresholding
145
  brightness = estimate_brightness(img)
146
+ conf_threshold = 0.8 if brightness > 100 else 0.6 # Stricter for bright displays
147
 
148
  # Detect ROI
149
  roi_img = detect_roi(img)
150
 
151
  # Process multiple image versions
152
  images_to_process = [
153
+ ("seven_segment", enhance_image(roi_img, mode="seven_segment"), {'contrast_ths': 0.15, 'adjust_contrast': 0.7, 'text_threshold': 0.8, 'allowlist': '0123456789.'}),
154
+ ("minimal", enhance_image(roi_img, mode="minimal"), {'contrast_ths': 0.15, 'adjust_contrast': 0.7, 'text_threshold': 0.8, 'allowlist': '0123456789.'}),
155
+ ("raw", enhance_image(roi_img, mode="raw"), {'contrast_ths': 0.15, 'adjust_contrast': 0.7, 'text_threshold': 0.8, 'allowlist': '0123456789.'}),
156
+ ("standard", enhance_image(roi_img, mode="standard"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5, 'text_threshold': 0.7}),
157
+ ("high_contrast", enhance_image(roi_img, mode="high_contrast"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5, 'text_threshold': 0.7}),
158
+ ("low_noise", enhance_image(roi_img, mode="low_noise"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5, 'text_threshold': 0.7}),
159
  ]
160
 
161
  best_weight = None
 
168
 
169
  for (bbox, text, conf) in results:
170
  # Apply seven-segment correction
171
+ text = correct_seven_segment(text, bbox, roi_img)
172
  original_text = text
173
  text = text.lower().strip()
174
 
 
190
  weight = float(text)
191
  # Score based on realistic weight range (0.1–500 kg)
192
  range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
193
+ # Prefer two-digit weights for scales
194
  digit_score = 1.5 if 10 <= weight < 100 else 1.0
195
  score = conf * range_score * digit_score
196
  if score > best_score and conf > conf_threshold: