Sanjayraju30 commited on
Commit
2b694be
·
verified ·
1 Parent(s): 199a126

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +105 -46
ocr_engine.py CHANGED
@@ -12,36 +12,78 @@ easyocr_reader = easyocr.Reader(['en'], gpu=False)
12
 
13
  def estimate_blur(img):
14
  """Estimate image blur using Laplacian variance"""
15
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
16
- return cv2.Laplacian(gray, cv2.CV_64F).var()
 
 
 
 
17
 
18
- def enhance_image(img):
 
19
  try:
20
- # Convert to grayscale
21
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- # Bilateral filter for noise reduction while preserving edges
24
- denoised = cv2.bilateralFilter(gray, d=9, sigmaColor=75, sigmaSpace=75)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
- # CLAHE for contrast enhancement
27
- clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
28
  contrast = clahe.apply(denoised)
29
 
30
- # Adaptive thresholding for uneven lighting
31
  thresh = cv2.adaptiveThreshold(contrast, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
32
  cv2.THRESH_BINARY, 11, 2)
33
 
34
- # Morphological operations to enhance text
35
  kernel = np.ones((3, 3), np.uint8)
36
  morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
37
 
38
- # Sharpen image
39
- sharpen_kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
 
 
40
  sharpened = cv2.filter2D(morphed, -1, sharpen_kernel)
41
 
42
  # Dynamic resizing
43
  h, w = sharpened.shape
44
- target_size = 800 # Target max dimension for OCR
45
  scale_factor = min(target_size / max(h, w), 2.0) if max(h, w) < 300 else min(target_size / max(h, w), 1.0)
46
  if scale_factor != 1.0:
47
  sharpened = cv2.resize(sharpened, None, fx=scale_factor, fy=scale_factor,
@@ -49,49 +91,66 @@ def enhance_image(img):
49
 
50
  return sharpened
51
  except Exception as e:
52
- logging.error(f"Image enhancement failed: {str(e)}")
53
- return img # Return original image as fallback
54
 
55
  def extract_weight_from_image(pil_img):
56
  try:
57
  img = np.array(pil_img)
58
  img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
59
 
60
- # Estimate blur to adjust confidence threshold
61
  blur_score = estimate_blur(img)
62
- conf_threshold = 0.3 if blur_score < 100 else 0.5 # Lower threshold for blurry images
63
 
64
- # Preprocess image
65
- processed = enhance_image(img)
 
 
 
 
 
 
 
 
66
 
67
- # Initialize results
68
  best_weight = None
69
  best_conf = 0.0
70
-
71
- # EasyOCR detection
72
- results = easyocr_reader.readtext(processed, detail=1, paragraph=False)
73
- if not results: # Fallback to original image if no results
74
- results = easyocr_reader.readtext(img, detail=1, paragraph=False)
75
-
76
- for (bbox, text, conf) in results:
77
- original_text = text
78
- text = text.lower().strip()
79
-
80
- # Fix common OCR errors
81
- text = text.replace(",", ".").replace(";", ".")
82
- text = text.replace("o", "0").replace("O", "0")
83
- text = text.replace("s", "5").replace("S", "5")
84
- text = text.replace("g", "9").replace("G", "6")
85
- text = text.replace("l", "1").replace("I", "1")
86
- text = text.replace("b", "8").replace("B", "8")
87
- text = text.replace("kgs", "").replace("kg", "").replace("k", "")
88
- text = re.sub(r"[^\d\.]", "", text)
89
-
90
- # Regex for weight (0.0 to 9999.999)
91
- if re.fullmatch(r"\d{1,4}(\.\d{0,3})?", text):
92
- if conf > best_conf and conf > conf_threshold:
93
- best_weight = text
94
- best_conf = conf
 
 
 
 
 
 
 
 
 
 
95
 
96
  if not best_weight:
97
  logging.info("No valid weight detected")
@@ -103,7 +162,7 @@ def extract_weight_from_image(pil_img):
103
  int_part = int_part.lstrip("0") or "0"
104
  best_weight = f"{int_part}.{dec_part.rstrip('0')}"
105
  else:
106
- best_weight = best_weight.lstrip("0") or "0"
107
 
108
  return best_weight, round(best_conf * 100, 2)
109
 
 
12
 
13
  def estimate_blur(img):
14
  """Estimate image blur using Laplacian variance"""
15
+ try:
16
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
17
+ return cv2.Laplacian(gray, cv2.CV_64F).var()
18
+ except Exception as e:
19
+ logging.error(f"Blur estimation failed: {str(e)}")
20
+ return 100 # Default value for fallback
21
 
22
+ def detect_roi(img):
23
+ """Detect and crop the region of interest (likely the digital display)"""
24
  try:
 
25
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
26
+ # Adaptive thresholding to handle varying lighting
27
+ thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
28
+ cv2.THRESH_BINARY_INV, 11, 2)
29
+ # Dilate to connect text regions
30
+ kernel = np.ones((5, 5), np.uint8)
31
+ dilated = cv2.dilate(thresh, kernel, iterations=1)
32
+ # Find contours
33
+ contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
34
+ if contours:
35
+ # Get the largest contour with reasonable size
36
+ valid_contours = [c for c in contours if cv2.contourArea(c) > 1000]
37
+ if valid_contours:
38
+ largest_contour = max(valid_contours, key=cv2.contourArea)
39
+ x, y, w, h = cv2.boundingRect(largest_contour)
40
+ # Add padding and ensure bounds
41
+ x, y = max(0, x-20), max(0, y-20)
42
+ w, h = min(w+40, img.shape[1]-x), min(h+40, img.shape[0]-y)
43
+ if w > 50 and h > 30: # Minimum size for valid ROI
44
+ return img[y:y+h, x:x+w]
45
+ return img # Fallback to original image
46
+ except Exception as e:
47
+ logging.error(f"ROI detection failed: {str(e)}")
48
+ return img
49
 
50
+ def enhance_image(img, mode="standard"):
51
+ """Enhance image with different modes for multi-scale processing"""
52
+ try:
53
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
54
+
55
+ if mode == "high_contrast":
56
+ # Stronger denoising and contrast for blurry images
57
+ denoised = cv2.bilateralFilter(gray, d=11, sigmaColor=100, sigmaSpace=100)
58
+ clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
59
+ elif mode == "low_noise":
60
+ # Gentle denoising for clear but noisy images
61
+ denoised = cv2.bilateralFilter(gray, d=7, sigmaColor=50, sigmaSpace=50)
62
+ clahe = cv2.createCLAHE(clipLimit=1.5, tileGridSize=(8, 8))
63
+ else:
64
+ # Standard preprocessing
65
+ denoised = cv2.bilateralFilter(gray, d=9, sigmaColor=75, sigmaSpace=75)
66
+ clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
67
 
 
 
68
  contrast = clahe.apply(denoised)
69
 
70
+ # Adaptive thresholding
71
  thresh = cv2.adaptiveThreshold(contrast, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
72
  cv2.THRESH_BINARY, 11, 2)
73
 
74
+ # Morphological operations
75
  kernel = np.ones((3, 3), np.uint8)
76
  morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
77
 
78
+ # Adaptive sharpening
79
+ blur_score = estimate_blur(img)
80
+ sharpen_strength = 5 if blur_score < 100 else 3
81
+ sharpen_kernel = np.array([[0, -1, 0], [-1, sharpen_strength, -1], [0, -1, 0]])
82
  sharpened = cv2.filter2D(morphed, -1, sharpen_kernel)
83
 
84
  # Dynamic resizing
85
  h, w = sharpened.shape
86
+ target_size = 800
87
  scale_factor = min(target_size / max(h, w), 2.0) if max(h, w) < 300 else min(target_size / max(h, w), 1.0)
88
  if scale_factor != 1.0:
89
  sharpened = cv2.resize(sharpened, None, fx=scale_factor, fy=scale_factor,
 
91
 
92
  return sharpened
93
  except Exception as e:
94
+ logging.error(f"Image enhancement failed (mode={mode}): {str(e)}")
95
+ return img
96
 
97
  def extract_weight_from_image(pil_img):
98
  try:
99
  img = np.array(pil_img)
100
  img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
101
 
102
+ # Estimate blur for adaptive thresholding
103
  blur_score = estimate_blur(img)
104
+ conf_threshold = 0.35 if blur_score < 100 else 0.55 # Slightly stricter thresholds
105
 
106
+ # Detect ROI
107
+ roi_img = detect_roi(img)
108
+
109
+ # Process multiple image versions
110
+ images_to_process = [
111
+ ("standard", enhance_image(roi_img, mode="standard"), {}),
112
+ ("high_contrast", enhance_image(roi_img, mode="high_contrast"), {}),
113
+ ("low_noise", enhance_image(roi_img, mode="low_noise"), {}),
114
+ ("original", roi_img, {'allowlist': '0123456789.'}) # Restrict to digits and decimal
115
+ ]
116
 
 
117
  best_weight = None
118
  best_conf = 0.0
119
+ best_score = 0.0
120
+
121
+ for mode, proc_img, ocr_params in images_to_process:
122
+ # EasyOCR detection
123
+ results = easyocr_reader.readtext(proc_img, detail=1, paragraph=False, **ocr_params)
124
+
125
+ for (bbox, text, conf) in results:
126
+ original_text = text
127
+ text = text.lower().strip()
128
+
129
+ # Fix common OCR errors
130
+ text = text.replace(",", ".").replace(";", ".")
131
+ text = text.replace("o", "0").replace("O", "0")
132
+ text = text.replace("s", "5").replace("S", "5")
133
+ text = text.replace("g", "9").replace("G", "6")
134
+ text = text.replace("l", "1").replace("I", "1")
135
+ text = text.replace("b", "8").replace("B", "8")
136
+ text = text.replace("z", "2").replace("Z", "2")
137
+ text = text.replace("q", "9").replace("Q", "9")
138
+ text = text.replace("kgs", "").replace("kg", "").replace("k", "")
139
+ text = re.sub(r"[^\d\.]", "", text)
140
+
141
+ # Regex for weight (0.0 to 9999.999)
142
+ if re.fullmatch(r"\d{1,4}(\.\d{0,3})?", text):
143
+ try:
144
+ weight = float(text)
145
+ # Score based on realistic weight range (0.1–500 kg)
146
+ range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
147
+ score = conf * range_score
148
+ if score > best_score and conf > conf_threshold:
149
+ best_weight = text
150
+ best_conf = conf
151
+ best_score = score
152
+ except ValueError:
153
+ continue
154
 
155
  if not best_weight:
156
  logging.info("No valid weight detected")
 
162
  int_part = int_part.lstrip("0") or "0"
163
  best_weight = f"{int_part}.{dec_part.rstrip('0')}"
164
  else:
165
+ best_weight = best_weight.lstrip('0') or "0"
166
 
167
  return best_weight, round(best_conf * 100, 2)
168