Sanjayraju30 commited on
Commit
e58b1c2
·
verified ·
1 Parent(s): 1d1e3da

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +59 -77
ocr_engine.py CHANGED
@@ -1,22 +1,15 @@
1
- import easyocr
2
  import numpy as np
3
  import cv2
4
  import re
5
  import logging
6
  from datetime import datetime
7
  import os
 
8
 
9
  # Set up logging
10
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
11
 
12
- # Initialize EasyOCR
13
- try:
14
- easyocr_reader = easyocr.Reader(['en'], gpu=False)
15
- logging.info("EasyOCR initialized successfully")
16
- except Exception as e:
17
- logging.error(f"Failed to initialize EasyOCR: {str(e)}")
18
- easyocr_reader = None
19
-
20
  # Directory for debug images
21
  DEBUG_DIR = "debug_images"
22
  os.makedirs(DEBUG_DIR, exist_ok=True)
@@ -25,7 +18,9 @@ def save_debug_image(img, filename_suffix, prefix=""):
25
  """Save image to debug directory with timestamp."""
26
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
27
  filename = os.path.join(DEBUG_DIR, f"{prefix}{timestamp}_{filename_suffix}.png")
28
- if len(img.shape) == 3:
 
 
29
  cv2.imwrite(filename, cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
30
  else:
31
  cv2.imwrite(filename, img)
@@ -40,19 +35,19 @@ def preprocess_image(img):
40
  """Preprocess image for OCR with enhanced contrast and noise reduction."""
41
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
42
  brightness = estimate_brightness(img)
43
- # Dynamic CLAHE based on brightness
44
- clahe_clip = 4.0 if brightness < 80 else 2.0
45
  clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8))
46
  enhanced = clahe.apply(gray)
47
  save_debug_image(enhanced, "01_preprocess_clahe")
48
- # Gaussian blur to reduce noise
49
  blurred = cv2.GaussianBlur(enhanced, (3, 3), 0)
50
  save_debug_image(blurred, "02_preprocess_blur")
51
- # Adaptive thresholding with dynamic block size
52
  block_size = max(11, min(31, int(img.shape[0] / 15) * 2 + 1))
53
  thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
54
  cv2.THRESH_BINARY_INV, block_size, 5)
55
- # Morphological operations to enhance digits
56
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
57
  thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
58
  thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
@@ -86,8 +81,7 @@ def detect_roi(img):
86
  save_debug_image(img, "04_original")
87
  thresh, enhanced = preprocess_image(img)
88
  brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
89
- # Try multiple block sizes for robust ROI detection
90
- block_sizes = [max(11, min(31, int(img.shape[0] / s) * 2 + 1)) for s in [15, 20, 25]]
91
  valid_contours = []
92
  img_area = img.shape[0] * img.shape[1]
93
 
@@ -104,15 +98,15 @@ def detect_roi(img):
104
  x, y, w, h = cv2.boundingRect(c)
105
  roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
106
  aspect_ratio = w / h
107
- if (300 < area < (img_area * 0.7) and
108
- 0.5 <= aspect_ratio <= 10.0 and w > 60 and h > 25 and roi_brightness > 40):
109
  valid_contours.append((c, area * roi_brightness))
110
  logging.debug(f"Contour (block {block_size}): Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
111
 
112
  if valid_contours:
113
  contour, _ = max(valid_contours, key=lambda x: x[1])
114
  x, y, w, h = cv2.boundingRect(contour)
115
- padding = max(20, min(60, int(min(w, h) * 0.3)))
116
  x, y = max(0, x - padding), max(0, y - padding)
117
  w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
118
  roi_img = img[y:y+h, x:x+w]
@@ -132,12 +126,11 @@ def detect_segments(digit_img, brightness):
132
  """Detect seven-segment digits with adaptive thresholds."""
133
  try:
134
  h, w = digit_img.shape
135
- if h < 10 or w < 5:
136
  logging.debug("Digit image too small for segment detection.")
137
  return None
138
 
139
- # Dynamic segment threshold based on brightness
140
- segment_threshold = 0.2 if brightness < 80 else 0.3
141
  segments = {
142
  'top': (int(w*0.1), int(w*0.9), 0, int(h*0.25)),
143
  'middle': (int(w*0.1), int(w*0.9), int(h*0.45), int(h*0.55)),
@@ -178,9 +171,9 @@ def detect_segments(digit_img, brightness):
178
  for digit, pattern in digit_patterns.items():
179
  matches = sum(1 for segment in pattern if segment_presence.get(segment, False))
180
  non_matches = sum(1 for segment in segment_presence if segment not in pattern and segment_presence[segment])
181
- score = matches - 0.2 * non_matches
182
- if matches >= len(pattern) * 0.6:
183
- score += 1.0
184
  if score > best_score:
185
  best_score = score
186
  best_match = digit
@@ -191,74 +184,63 @@ def detect_segments(digit_img, brightness):
191
  return None
192
 
193
  def perform_ocr(img, roi_bbox):
194
- """Perform OCR with EasyOCR and seven-segment fallback."""
195
- if easyocr_reader is None:
196
- logging.error("EasyOCR not initialized, cannot perform OCR.")
197
- return None, 0.0
198
  try:
199
  thresh, enhanced = preprocess_image(img)
200
  brightness = estimate_brightness(img)
201
- # Dynamic EasyOCR parameters
202
- results = easyocr_reader.readtext(thresh, detail=1, paragraph=False,
203
- contrast_ths=0.1, adjust_contrast=1.5,
204
- text_threshold=0.3, mag_ratio=3.0,
205
- allowlist='0123456789.', batch_size=1, y_ths=0.2)
206
- save_debug_image(thresh, "07_ocr_threshold")
207
- logging.info(f"EasyOCR results: {results}")
208
-
209
- if not results:
210
- logging.info("EasyOCR failed, trying fallback parameters.")
211
- results = easyocr_reader.readtext(thresh, detail=1, paragraph=False,
212
- contrast_ths=0.05, adjust_contrast=2.0,
213
- text_threshold=0.2, mag_ratio=4.0,
214
- allowlist='0123456789.', batch_size=1, y_ths=0.2)
215
- save_debug_image(thresh, "07_fallback_threshold")
 
 
 
216
 
 
 
 
217
  digits_info = []
218
- for (bbox, text, conf) in results:
219
- (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
220
- h_bbox = max(y1, y2, y3, y4) - min(y1, y2, y3, y4)
221
- if (text.isdigit() or text == '.') and h_bbox > 10 and conf > 0.2:
222
- x_min, x_max = int(min(x1, x4)), int(max(x2, x3))
223
- y_min, y_max = int(min(y1, y2)), int(max(y3, y4))
224
- digits_info.append((x_min, x_max, y_min, y_max, text, conf))
225
-
226
  if digits_info:
227
  digits_info.sort(key=lambda x: x[0])
228
  recognized_text = ""
229
- total_conf = 0.0
230
- conf_count = 0
231
- for idx, (x_min, x_max, y_min, y_max, char, conf) in enumerate(digits_info):
232
  x_min, y_min = max(0, x_min), max(0, y_min)
233
  x_max, y_max = min(thresh.shape[1], x_max), min(thresh.shape[0], y_max)
234
  if x_max <= x_min or y_max <= y_min:
235
  continue
236
- if conf < 0.7 and char != '.':
237
- digit_crop = thresh[y_min:y_max, x_min:x_max]
238
- save_debug_image(digit_crop, f"08_digit_crop_{idx}_{char}")
239
- segment_digit = detect_segments(digit_crop, brightness)
240
- if segment_digit:
241
- recognized_text += segment_digit
242
- total_conf += 0.85
243
- logging.debug(f"Used segment detection for char {char}: {segment_digit}")
244
- else:
245
- recognized_text += char
246
- total_conf += conf
247
- conf_count += 1
248
- else:
249
- recognized_text += char
250
- total_conf += conf
251
- conf_count += 1
252
-
253
- avg_conf = total_conf / conf_count if conf_count > 0 else 0.0
254
  text = re.sub(r"[^\d\.]", "", recognized_text)
255
  if text.count('.') > 1:
256
  text = text.replace('.', '', text.count('.') - 1)
257
  text = text.strip('.')
258
  if text and re.fullmatch(r"^\d*\.?\d*$", text):
259
  text = text.lstrip('0') or '0'
260
- logging.info(f"Validated text: {text}, Confidence: {avg_conf:.2f}")
261
- return text, avg_conf * 100
 
 
262
  logging.info("No valid digits detected.")
263
  return None, 0.0
264
  except Exception as e:
@@ -273,7 +255,7 @@ def extract_weight_from_image(pil_img):
273
  save_debug_image(img, "00_input_image")
274
  img = correct_rotation(img)
275
  brightness = estimate_brightness(img)
276
- conf_threshold = 0.7 if brightness > 100 else 0.5
277
 
278
  roi_img, roi_bbox = detect_roi(img)
279
  if roi_bbox:
 
1
+ import pytesseract
2
  import numpy as np
3
  import cv2
4
  import re
5
  import logging
6
  from datetime import datetime
7
  import os
8
+ from PIL import Image
9
 
10
  # Set up logging
11
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
12
 
 
 
 
 
 
 
 
 
13
  # Directory for debug images
14
  DEBUG_DIR = "debug_images"
15
  os.makedirs(DEBUG_DIR, exist_ok=True)
 
18
  """Save image to debug directory with timestamp."""
19
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
20
  filename = os.path.join(DEBUG_DIR, f"{prefix}{timestamp}_{filename_suffix}.png")
21
+ if isinstance(img, Image.Image):
22
+ img.save(filename)
23
+ elif len(img.shape) == 3:
24
  cv2.imwrite(filename, cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
25
  else:
26
  cv2.imwrite(filename, img)
 
35
  """Preprocess image for OCR with enhanced contrast and noise reduction."""
36
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
37
  brightness = estimate_brightness(img)
38
+ # Dynamic CLAHE
39
+ clahe_clip = 5.0 if brightness < 80 else 3.0
40
  clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8))
41
  enhanced = clahe.apply(gray)
42
  save_debug_image(enhanced, "01_preprocess_clahe")
43
+ # Gaussian blur
44
  blurred = cv2.GaussianBlur(enhanced, (3, 3), 0)
45
  save_debug_image(blurred, "02_preprocess_blur")
46
+ # Dynamic thresholding
47
  block_size = max(11, min(31, int(img.shape[0] / 15) * 2 + 1))
48
  thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
49
  cv2.THRESH_BINARY_INV, block_size, 5)
50
+ # Morphological operations
51
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
52
  thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
53
  thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
 
81
  save_debug_image(img, "04_original")
82
  thresh, enhanced = preprocess_image(img)
83
  brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
84
+ block_sizes = [max(11, min(31, int(img.shape[0] / s) * 2 + 1)) for s in [12, 15, 18]]
 
85
  valid_contours = []
86
  img_area = img.shape[0] * img.shape[1]
87
 
 
98
  x, y, w, h = cv2.boundingRect(c)
99
  roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
100
  aspect_ratio = w / h
101
+ if (400 < area < (img_area * 0.6) and
102
+ 0.5 <= aspect_ratio <= 8.0 and w > 70 and h > 30 and roi_brightness > 50):
103
  valid_contours.append((c, area * roi_brightness))
104
  logging.debug(f"Contour (block {block_size}): Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
105
 
106
  if valid_contours:
107
  contour, _ = max(valid_contours, key=lambda x: x[1])
108
  x, y, w, h = cv2.boundingRect(contour)
109
+ padding = max(20, min(60, int(min(w, h) * 0.4)))
110
  x, y = max(0, x - padding), max(0, y - padding)
111
  w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
112
  roi_img = img[y:y+h, x:x+w]
 
126
  """Detect seven-segment digits with adaptive thresholds."""
127
  try:
128
  h, w = digit_img.shape
129
+ if h < 15 or w < 8:
130
  logging.debug("Digit image too small for segment detection.")
131
  return None
132
 
133
+ segment_threshold = 0.25 if brightness < 80 else 0.35
 
134
  segments = {
135
  'top': (int(w*0.1), int(w*0.9), 0, int(h*0.25)),
136
  'middle': (int(w*0.1), int(w*0.9), int(h*0.45), int(h*0.55)),
 
171
  for digit, pattern in digit_patterns.items():
172
  matches = sum(1 for segment in pattern if segment_presence.get(segment, False))
173
  non_matches = sum(1 for segment in segment_presence if segment not in pattern and segment_presence[segment])
174
+ score = matches - 0.15 * non_matches
175
+ if matches >= len(pattern) * 0.65:
176
+ score += 1.2
177
  if score > best_score:
178
  best_score = score
179
  best_match = digit
 
184
  return None
185
 
186
  def perform_ocr(img, roi_bbox):
187
+ """Perform OCR with Tesseract and seven-segment fallback."""
 
 
 
188
  try:
189
  thresh, enhanced = preprocess_image(img)
190
  brightness = estimate_brightness(img)
191
+ pil_img = Image.fromarray(enhanced)
192
+ save_debug_image(pil_img, "07_ocr_input")
193
+
194
+ # Tesseract OCR with numeric config
195
+ custom_config = r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.'
196
+ text = pytesseract.image_to_string(pil_img, config=custom_config)
197
+ logging.info(f"Tesseract raw output: {text}")
198
+
199
+ # Clean and validate text
200
+ text = re.sub(r"[^\d\.]", "", text)
201
+ if text.count('.') > 1:
202
+ text = text.replace('.', '', text.count('.') - 1)
203
+ text = text.strip('.')
204
+ if text and re.fullmatch(r"^\d*\.?\d*$", text):
205
+ text = text.lstrip('0') or '0'
206
+ confidence = 95.0 if len(text.replace('.', '')) >= 2 else 90.0
207
+ logging.info(f"Validated Tesseract text: {text}, Confidence: {confidence:.2f}%")
208
+ return text, confidence
209
 
210
+ # Fallback to seven-segment detection
211
+ logging.info("Tesseract failed, using seven-segment detection.")
212
+ contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
213
  digits_info = []
214
+ for c in contours:
215
+ x, y, w, h = cv2.boundingRect(c)
216
+ if w > 10 and h > 15 and 0.2 <= w/h <= 1.5:
217
+ digits_info.append((x, x+w, y, y+h))
218
+
 
 
 
219
  if digits_info:
220
  digits_info.sort(key=lambda x: x[0])
221
  recognized_text = ""
222
+ for idx, (x_min, x_max, y_min, y_max) in enumerate(digits_info):
 
 
223
  x_min, y_min = max(0, x_min), max(0, y_min)
224
  x_max, y_max = min(thresh.shape[1], x_max), min(thresh.shape[0], y_max)
225
  if x_max <= x_min or y_max <= y_min:
226
  continue
227
+ digit_crop = thresh[y_min:y_max, x_min:x_max]
228
+ save_debug_image(digit_crop, f"08_digit_crop_{idx}")
229
+ segment_digit = detect_segments(digit_crop, brightness)
230
+ if segment_digit:
231
+ recognized_text += segment_digit
232
+ elif idx < len(digits_info) - 1 and (digits_info[idx+1][0] - x_max) < 10:
233
+ recognized_text += '.' # Assume decimal point for close digits
 
 
 
 
 
 
 
 
 
 
 
234
  text = re.sub(r"[^\d\.]", "", recognized_text)
235
  if text.count('.') > 1:
236
  text = text.replace('.', '', text.count('.') - 1)
237
  text = text.strip('.')
238
  if text and re.fullmatch(r"^\d*\.?\d*$", text):
239
  text = text.lstrip('0') or '0'
240
+ confidence = 90.0
241
+ logging.info(f"Validated segment text: {text}, Confidence: {confidence:.2f}%")
242
+ return text, confidence
243
+
244
  logging.info("No valid digits detected.")
245
  return None, 0.0
246
  except Exception as e:
 
255
  save_debug_image(img, "00_input_image")
256
  img = correct_rotation(img)
257
  brightness = estimate_brightness(img)
258
+ conf_threshold = 0.8 if brightness > 100 else 0.6
259
 
260
  roi_img, roi_bbox = detect_roi(img)
261
  if roi_bbox: