Sanjayraju30 commited on
Commit
0f29b7c
·
verified ·
1 Parent(s): 2b694be

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +43 -40
ocr_engine.py CHANGED
@@ -10,39 +10,36 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
10
  # Initialize EasyOCR
11
  easyocr_reader = easyocr.Reader(['en'], gpu=False)
12
 
13
- def estimate_blur(img):
14
- """Estimate image blur using Laplacian variance"""
15
- try:
16
- gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
17
- return cv2.Laplacian(gray, cv2.CV_64F).var()
18
- except Exception as e:
19
- logging.error(f"Blur estimation failed: {str(e)}")
20
- return 100 # Default value for fallback
21
 
22
  def detect_roi(img):
23
  """Detect and crop the region of interest (likely the digital display)"""
24
  try:
25
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
26
- # Adaptive thresholding to handle varying lighting
27
- thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
28
- cv2.THRESH_BINARY_INV, 11, 2)
29
- # Dilate to connect text regions
30
- kernel = np.ones((5, 5), np.uint8)
31
- dilated = cv2.dilate(thresh, kernel, iterations=1)
 
32
  # Find contours
33
  contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
34
  if contours:
35
  # Get the largest contour with reasonable size
36
- valid_contours = [c for c in contours if cv2.contourArea(c) > 1000]
37
  if valid_contours:
38
  largest_contour = max(valid_contours, key=cv2.contourArea)
39
  x, y, w, h = cv2.boundingRect(largest_contour)
40
- # Add padding and ensure bounds
41
- x, y = max(0, x-20), max(0, y-20)
42
- w, h = min(w+40, img.shape[1]-x), min(h+40, img.shape[0]-y)
43
- if w > 50 and h > 30: # Minimum size for valid ROI
44
  return img[y:y+h, x:x+w]
45
- return img # Fallback to original image
46
  except Exception as e:
47
  logging.error(f"ROI detection failed: {str(e)}")
48
  return img
@@ -52,32 +49,34 @@ def enhance_image(img, mode="standard"):
52
  try:
53
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
54
 
55
- if mode == "high_contrast":
56
- # Stronger denoising and contrast for blurry images
 
 
 
57
  denoised = cv2.bilateralFilter(gray, d=11, sigmaColor=100, sigmaSpace=100)
58
  clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
 
59
  elif mode == "low_noise":
60
- # Gentle denoising for clear but noisy images
61
  denoised = cv2.bilateralFilter(gray, d=7, sigmaColor=50, sigmaSpace=50)
62
  clahe = cv2.createCLAHE(clipLimit=1.5, tileGridSize=(8, 8))
 
63
  else:
64
- # Standard preprocessing
65
  denoised = cv2.bilateralFilter(gray, d=9, sigmaColor=75, sigmaSpace=75)
66
  clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
 
67
 
68
- contrast = clahe.apply(denoised)
69
-
70
- # Adaptive thresholding
71
- thresh = cv2.adaptiveThreshold(contrast, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
72
- cv2.THRESH_BINARY, 11, 2)
73
 
74
  # Morphological operations
75
  kernel = np.ones((3, 3), np.uint8)
76
  morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
77
 
78
- # Adaptive sharpening
79
- blur_score = estimate_blur(img)
80
- sharpen_strength = 5 if blur_score < 100 else 3
81
  sharpen_kernel = np.array([[0, -1, 0], [-1, sharpen_strength, -1], [0, -1, 0]])
82
  sharpened = cv2.filter2D(morphed, -1, sharpen_kernel)
83
 
@@ -99,19 +98,20 @@ def extract_weight_from_image(pil_img):
99
  img = np.array(pil_img)
100
  img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
101
 
102
- # Estimate blur for adaptive thresholding
103
- blur_score = estimate_blur(img)
104
- conf_threshold = 0.35 if blur_score < 100 else 0.55 # Slightly stricter thresholds
105
 
106
  # Detect ROI
107
  roi_img = detect_roi(img)
108
 
109
  # Process multiple image versions
110
  images_to_process = [
111
- ("standard", enhance_image(roi_img, mode="standard"), {}),
112
- ("high_contrast", enhance_image(roi_img, mode="high_contrast"), {}),
113
- ("low_noise", enhance_image(roi_img, mode="low_noise"), {}),
114
- ("original", roi_img, {'allowlist': '0123456789.'}) # Restrict to digits and decimal
 
115
  ]
116
 
117
  best_weight = None
@@ -135,6 +135,7 @@ def extract_weight_from_image(pil_img):
135
  text = text.replace("b", "8").replace("B", "8")
136
  text = text.replace("z", "2").replace("Z", "2")
137
  text = text.replace("q", "9").replace("Q", "9")
 
138
  text = text.replace("kgs", "").replace("kg", "").replace("k", "")
139
  text = re.sub(r"[^\d\.]", "", text)
140
 
@@ -144,7 +145,9 @@ def extract_weight_from_image(pil_img):
144
  weight = float(text)
145
  # Score based on realistic weight range (0.1–500 kg)
146
  range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
147
- score = conf * range_score
 
 
148
  if score > best_score and conf > conf_threshold:
149
  best_weight = text
150
  best_conf = conf
 
10
  # Initialize EasyOCR
11
  easyocr_reader = easyocr.Reader(['en'], gpu=False)
12
 
13
+ def estimate_brightness(img):
14
+ """Estimate image brightness to detect illuminated displays"""
15
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
16
+ return np.mean(gray)
 
 
 
 
17
 
18
  def detect_roi(img):
19
  """Detect and crop the region of interest (likely the digital display)"""
20
  try:
21
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
22
+ # Threshold to isolate bright areas (like illuminated displays)
23
+ brightness = estimate_brightness(img)
24
+ thresh_value = 200 if brightness > 100 else 150 # Adjust based on brightness
25
+ _, thresh = cv2.threshold(gray, thresh_value, 255, cv2.THRESH_BINARY)
26
+ # Dilate to connect digits
27
+ kernel = np.ones((7, 7), np.uint8)
28
+ dilated = cv2.dilate(thresh, kernel, iterations=2)
29
  # Find contours
30
  contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
31
  if contours:
32
  # Get the largest contour with reasonable size
33
+ valid_contours = [c for c in contours if cv2.contourArea(c) > 500]
34
  if valid_contours:
35
  largest_contour = max(valid_contours, key=cv2.contourArea)
36
  x, y, w, h = cv2.boundingRect(largest_contour)
37
+ # Add more padding and ensure bounds
38
+ x, y = max(0, x-30), max(0, y-30)
39
+ w, h = min(w+60, img.shape[1]-x), min(h+60, img.shape[0]-y)
40
+ if w > 50 and h > 30:
41
  return img[y:y+h, x:x+w]
42
+ return img
43
  except Exception as e:
44
  logging.error(f"ROI detection failed: {str(e)}")
45
  return img
 
49
  try:
50
  gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
51
 
52
+ if mode == "seven_segment":
53
+ # Gentle preprocessing for seven-segment displays
54
+ denoised = cv2.GaussianBlur(gray, (5, 5), 0)
55
+ _, thresh = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
56
+ elif mode == "high_contrast":
57
  denoised = cv2.bilateralFilter(gray, d=11, sigmaColor=100, sigmaSpace=100)
58
  clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
59
+ thresh = clahe.apply(denoised)
60
  elif mode == "low_noise":
 
61
  denoised = cv2.bilateralFilter(gray, d=7, sigmaColor=50, sigmaSpace=50)
62
  clahe = cv2.createCLAHE(clipLimit=1.5, tileGridSize=(8, 8))
63
+ thresh = clahe.apply(denoised)
64
  else:
 
65
  denoised = cv2.bilateralFilter(gray, d=9, sigmaColor=75, sigmaSpace=75)
66
  clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
67
+ thresh = clahe.apply(denoised)
68
 
69
+ if mode != "seven_segment":
70
+ thresh = cv2.adaptiveThreshold(thresh, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
71
+ cv2.THRESH_BINARY, 11, 2)
 
 
72
 
73
  # Morphological operations
74
  kernel = np.ones((3, 3), np.uint8)
75
  morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
76
 
77
+ # Reduced sharpening for seven-segment displays
78
+ brightness = estimate_brightness(img)
79
+ sharpen_strength = 3 if mode == "seven_segment" or brightness > 100 else 5
80
  sharpen_kernel = np.array([[0, -1, 0], [-1, sharpen_strength, -1], [0, -1, 0]])
81
  sharpened = cv2.filter2D(morphed, -1, sharpen_kernel)
82
 
 
98
  img = np.array(pil_img)
99
  img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
100
 
101
+ # Estimate brightness for adaptive thresholding
102
+ brightness = estimate_brightness(img)
103
+ conf_threshold = 0.5 if brightness > 100 else 0.4 # Stricter for bright displays
104
 
105
  # Detect ROI
106
  roi_img = detect_roi(img)
107
 
108
  # Process multiple image versions
109
  images_to_process = [
110
+ ("seven_segment", enhance_image(roi_img, mode="seven_segment"), {'contrast_ths': 0.3, 'allowlist': '0123456789.'}),
111
+ ("standard", enhance_image(roi_img, mode="standard"), {'contrast_ths': 0.1}),
112
+ ("high_contrast", enhance_image(roi_img, mode="high_contrast"), {'contrast_ths': 0.1}),
113
+ ("low_noise", enhance_image(roi_img, mode="low_noise"), {'contrast_ths': 0.1}),
114
+ ("original", roi_img, {'contrast_ths': 0.3, 'allowlist': '0123456789.'})
115
  ]
116
 
117
  best_weight = None
 
135
  text = text.replace("b", "8").replace("B", "8")
136
  text = text.replace("z", "2").replace("Z", "2")
137
  text = text.replace("q", "9").replace("Q", "9")
138
+ text = text.replace("6", "2").replace("9", "2") # Specific correction for seven-segment
139
  text = text.replace("kgs", "").replace("kg", "").replace("k", "")
140
  text = re.sub(r"[^\d\.]", "", text)
141
 
 
145
  weight = float(text)
146
  # Score based on realistic weight range (0.1–500 kg)
147
  range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
148
+ # Prefer two-digit weights for scales
149
+ digit_score = 1.1 if 10 <= weight < 100 else 1.0
150
+ score = conf * range_score * digit_score
151
  if score > best_score and conf > conf_threshold:
152
  best_weight = text
153
  best_conf = conf