AutoWeightLogger1

Running

App Files Files Community

Sanjayraju30 commited on 4 days ago

Commit

7c31f9a

verified ·

1 Parent(s): 0f29b7c

Update ocr_engine.py

Browse files

Files changed (1) hide show

ocr_engine.py +55 -34

ocr_engine.py CHANGED Viewed

@@ -19,40 +19,58 @@ def detect_roi(img):
     """Detect and crop the region of interest (likely the digital display)"""
     try:
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        # Threshold to isolate bright areas (like illuminated displays)
         brightness = estimate_brightness(img)
-        thresh_value = 200 if brightness > 100 else 150  # Adjust based on brightness
         _, thresh = cv2.threshold(gray, thresh_value, 255, cv2.THRESH_BINARY)
-        # Dilate to connect digits
-        kernel = np.ones((7, 7), np.uint8)
-        dilated = cv2.dilate(thresh, kernel, iterations=2)
         # Find contours
         contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
         if contours:
-            # Get the largest contour with reasonable size
             valid_contours = [c for c in contours if cv2.contourArea(c) > 500]
             if valid_contours:
-                largest_contour = max(valid_contours, key=cv2.contourArea)
-                x, y, w, h = cv2.boundingRect(largest_contour)
-                # Add more padding and ensure bounds
-                x, y = max(0, x-30), max(0, y-30)
-                w, h = min(w+60, img.shape[1]-x), min(h+60, img.shape[0]-y)
-                if w > 50 and h > 30:
-                    return img[y:y+h, x:x+w]
         return img
     except Exception as e:
         logging.error(f"ROI detection failed: {str(e)}")
         return img
 def enhance_image(img, mode="standard"):
     """Enhance image with different modes for multi-scale processing"""
     try:
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
         if mode == "seven_segment":
-            # Gentle preprocessing for seven-segment displays
-            denoised = cv2.GaussianBlur(gray, (5, 5), 0)
             _, thresh = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
         elif mode == "high_contrast":
             denoised = cv2.bilateralFilter(gray, d=11, sigmaColor=100, sigmaSpace=100)
             clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
@@ -66,7 +84,7 @@ def enhance_image(img, mode="standard"):
             clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
             thresh = clahe.apply(denoised)
-        if mode != "seven_segment":
             thresh = cv2.adaptiveThreshold(thresh, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                           cv2.THRESH_BINARY, 11, 2)
@@ -74,21 +92,22 @@ def enhance_image(img, mode="standard"):
         kernel = np.ones((3, 3), np.uint8)
         morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
-        # Reduced sharpening for seven-segment displays
-        brightness = estimate_brightness(img)
-        sharpen_strength = 3 if mode == "seven_segment" or brightness > 100 else 5
-        sharpen_kernel = np.array([[0, -1, 0], [-1, sharpen_strength, -1], [0, -1, 0]])
-        sharpened = cv2.filter2D(morphed, -1, sharpen_kernel)
         # Dynamic resizing
-        h, w = sharpened.shape
         target_size = 800
         scale_factor = min(target_size / max(h, w), 2.0) if max(h, w) < 300 else min(target_size / max(h, w), 1.0)
         if scale_factor != 1.0:
-            sharpened = cv2.resize(sharpened, None, fx=scale_factor, fy=scale_factor,
-                                 interpolation=cv2.INTER_CUBIC if scale_factor > 1 else cv2.INTER_AREA)
-        return sharpened
     except Exception as e:
         logging.error(f"Image enhancement failed (mode={mode}): {str(e)}")
         return img
@@ -100,18 +119,19 @@ def extract_weight_from_image(pil_img):
         # Estimate brightness for adaptive thresholding
         brightness = estimate_brightness(img)
-        conf_threshold = 0.5 if brightness > 100 else 0.4  # Stricter for bright displays
         # Detect ROI
         roi_img = detect_roi(img)
         # Process multiple image versions
         images_to_process = [
-            ("seven_segment", enhance_image(roi_img, mode="seven_segment"), {'contrast_ths': 0.3, 'allowlist': '0123456789.'}),
-            ("standard", enhance_image(roi_img, mode="standard"), {'contrast_ths': 0.1}),
-            ("high_contrast", enhance_image(roi_img, mode="high_contrast"), {'contrast_ths': 0.1}),
-            ("low_noise", enhance_image(roi_img, mode="low_noise"), {'contrast_ths': 0.1}),
-            ("original", roi_img, {'contrast_ths': 0.3, 'allowlist': '0123456789.'})
         ]
         best_weight = None
@@ -123,6 +143,8 @@ def extract_weight_from_image(pil_img):
             results = easyocr_reader.readtext(proc_img, detail=1, paragraph=False, **ocr_params)
             for (bbox, text, conf) in results:
                 original_text = text
                 text = text.lower().strip()
@@ -135,7 +157,6 @@ def extract_weight_from_image(pil_img):
                 text = text.replace("b", "8").replace("B", "8")
                 text = text.replace("z", "2").replace("Z", "2")
                 text = text.replace("q", "9").replace("Q", "9")
-                text = text.replace("6", "2").replace("9", "2")  # Specific correction for seven-segment
                 text = text.replace("kgs", "").replace("kg", "").replace("k", "")
                 text = re.sub(r"[^\d\.]", "", text)
@@ -145,8 +166,8 @@ def extract_weight_from_image(pil_img):
                         weight = float(text)
                         # Score based on realistic weight range (0.1–500 kg)
                         range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
-                        # Prefer two-digit weights for scales
-                        digit_score = 1.1 if 10 <= weight < 100 else 1.0
                         score = conf * range_score * digit_score
                         if score > best_score and conf > conf_threshold:
                             best_weight = text

     """Detect and crop the region of interest (likely the digital display)"""
     try:
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        # Stricter threshold for bright areas
         brightness = estimate_brightness(img)
+        thresh_value = 220 if brightness > 100 else 180
         _, thresh = cv2.threshold(gray, thresh_value, 255, cv2.THRESH_BINARY)
+        # Morphological operations to connect digits
+        kernel = np.ones((9, 9), np.uint8)
+        dilated = cv2.dilate(thresh, kernel, iterations=3)
         # Find contours
         contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
         if contours:
+            # Filter contours by size and aspect ratio (typical for displays)
             valid_contours = [c for c in contours if cv2.contourArea(c) > 500]
             if valid_contours:
+                for contour in sorted(valid_contours, key=cv2.contourArea, reverse=True):
+                    x, y, w, h = cv2.boundingRect(contour)
+                    aspect_ratio = w / h
+                    if 1.5 <= aspect_ratio <= 4.0 and w > 50 and h > 30:  # Typical display aspect ratio
+                        x, y = max(0, x-40), max(0, y-40)
+                        w, h = min(w+80, img.shape[1]-x), min(h+80, img.shape[0]-y)
+                        return img[y:y+h, x:x+w]
         return img
     except Exception as e:
         logging.error(f"ROI detection failed: {str(e)}")
         return img
+def correct_seven_segment(text, bbox):
+    """Correct common seven-segment misreads based on bounding box shape"""
+    if "6" in text:
+        # Check bounding box aspect ratio to differentiate "6" from "2"
+        (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
+        width = abs(x2 - x1)
+        height = abs(y2 - y1)
+        aspect_ratio = width / height if height > 0 else 1.0
+        # "2" typically has a more rectangular shape in seven-segment
+        if aspect_ratio > 0.5:  # Adjust based on typical "2" vs "6" shapes
+            text = text.replace("6", "2")
+    return text
 def enhance_image(img, mode="standard"):
     """Enhance image with different modes for multi-scale processing"""
     try:
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
         if mode == "seven_segment":
+            # Minimal preprocessing for seven-segment displays
+            _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+            return thresh
+        elif mode == "minimal":
+            # Very light preprocessing
+            denoised = cv2.GaussianBlur(gray, (3, 3), 0)
             _, thresh = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+            return thresh
         elif mode == "high_contrast":
             denoised = cv2.bilateralFilter(gray, d=11, sigmaColor=100, sigmaSpace=100)
             clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
             clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
             thresh = clahe.apply(denoised)
+        if mode not in ["seven_segment", "minimal"]:
             thresh = cv2.adaptiveThreshold(thresh, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                           cv2.THRESH_BINARY, 11, 2)
         kernel = np.ones((3, 3), np.uint8)
         morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
+        # Skip sharpening for seven-segment and minimal modes
+        if mode not in ["seven_segment", "minimal"]:
+            brightness = estimate_brightness(img)
+            sharpen_strength = 3 if brightness > 100 else 5
+            sharpen_kernel = np.array([[0, -1, 0], [-1, sharpen_strength, -1], [0, -1, 0]])
+            morphed = cv2.filter2D(morphed, -1, sharpen_kernel)
         # Dynamic resizing
+        h, w = morphed.shape
         target_size = 800
         scale_factor = min(target_size / max(h, w), 2.0) if max(h, w) < 300 else min(target_size / max(h, w), 1.0)
         if scale_factor != 1.0:
+            morphed = cv2.resize(morphed, None, fx=scale_factor, fy=scale_factor,
+                                interpolation=cv2.INTER_CUBIC if scale_factor > 1 else cv2.INTER_AREA)
+        return morphed
     except Exception as e:
         logging.error(f"Image enhancement failed (mode={mode}): {str(e)}")
         return img
         # Estimate brightness for adaptive thresholding
         brightness = estimate_brightness(img)
+        conf_threshold = 0.7 if brightness > 100 else 0.5  # Stricter for bright displays
         # Detect ROI
         roi_img = detect_roi(img)
         # Process multiple image versions
         images_to_process = [
+            ("seven_segment", enhance_image(roi_img, mode="seven_segment"), {'contrast_ths': 0.2, 'adjust_contrast': 0.5, 'allowlist': '0123456789.'}),
+            ("minimal", enhance_image(roi_img, mode="minimal"), {'contrast_ths': 0.2, 'adjust_contrast': 0.5, 'allowlist': '0123456789.'}),
+            ("standard", enhance_image(roi_img, mode="standard"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5}),
+            ("high_contrast", enhance_image(roi_img, mode="high_contrast"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5}),
+            ("low_noise", enhance_image(roi_img, mode="low_noise"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5}),
+            ("original", roi_img, {'contrast_ths': 0.2, 'adjust_contrast': 0.5, 'allowlist': '0123456789.'})
         ]
         best_weight = None
             results = easyocr_reader.readtext(proc_img, detail=1, paragraph=False, **ocr_params)
             for (bbox, text, conf) in results:
+                # Apply seven-segment correction
+                text = correct_seven_segment(text, bbox)
                 original_text = text
                 text = text.lower().strip()
                 text = text.replace("b", "8").replace("B", "8")
                 text = text.replace("z", "2").replace("Z", "2")
                 text = text.replace("q", "9").replace("Q", "9")
                 text = text.replace("kgs", "").replace("kg", "").replace("k", "")
                 text = re.sub(r"[^\d\.]", "", text)
                         weight = float(text)
                         # Score based on realistic weight range (0.1–500 kg)
                         range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
+                        # Strongly prefer two-digit weights for scales
+                        digit_score = 1.5 if 10 <= weight < 100 else 1.0
                         score = conf * range_score * digit_score
                         if score > best_score and conf > conf_threshold:
                             best_weight = text