AutoWeightLogger1

Sleeping

App Files Files Community

Sanjayraju30 commited on 26 days ago

Commit

4c95d04

verified ·

1 Parent(s): 12c2109

Update ocr_engine.py

Browse files

Files changed (1) hide show

ocr_engine.py +119 -116

ocr_engine.py CHANGED Viewed

@@ -19,17 +19,13 @@ def detect_roi(img):
     """Detect and crop the region of interest (likely the digital display)"""
     try:
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        # Stricter threshold for bright areas
         brightness = estimate_brightness(img)
         thresh_value = 230 if brightness > 100 else 190
         _, thresh = cv2.threshold(gray, thresh_value, 255, cv2.THRESH_BINARY)
-        # Morphological operations to connect digits
         kernel = np.ones((9, 9), np.uint8)
         dilated = cv2.dilate(thresh, kernel, iterations=3)
-        # Find contours
         contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
         if contours:
-            # Filter contours by size and aspect ratio
             valid_contours = [c for c in contours if cv2.contourArea(c) > 500]
             if valid_contours:
                 for contour in sorted(valid_contours, key=cv2.contourArea, reverse=True):
@@ -38,124 +34,139 @@ def detect_roi(img):
                     if 1.5 <= aspect_ratio <= 4.0 and w > 50 and h > 30:
                         x, y = max(0, x-40), max(0, y-40)
                         w, h = min(w+80, img.shape[1]-x), min(h+80, img.shape[0]-y)
-                        return img[y:y+h, x:x+w]
-        return img
     except Exception as e:
         logging.error(f"ROI detection failed: {str(e)}")
-        return img
-def correct_seven_segment(text, bbox, img):
-    """Correct common seven-segment misreads based on bounding box and pixel distribution"""
-    if "2" in text or "6" in text:
-        # Extract bounding box coordinates
-        (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
-        x_min, x_max = min(x1, x4), max(x2, x3)
-        y_min, y_max = min(y1, y2), max(y3, y4)
-        # Ensure bounds are within image
-        x_min, y_min = max(0, int(x_min)), max(0, int(y_min))
-        x_max, y_max = min(img.shape[1], int(x_max)), min(img.shape[0], int(y_max))
-        if x_max <= x_min or y_max <= y_min:
-            return text
-        # Crop the digit area
-        digit_area = img[y_min:y_max, x_min:x_max]
-        if digit_area.size == 0:
-            return text
-        # Convert to grayscale and threshold
-        gray = cv2.cvtColor(digit_area, cv2.COLOR_BGR2GRAY)
-        _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
-        # Calculate pixel distribution in upper vs lower half
-        h, w = thresh.shape
-        upper_half = thresh[:h//2, :]
-        lower_half = thresh[h//2:, :]
-        upper_pixels = np.sum(upper_half == 255)
-        lower_pixels = np.sum(lower_half == 255)
-        # "6" has more pixels in the lower half due to the loop, "2" is more balanced
-        if lower_pixels > upper_pixels * 1.5:
-            text = text.replace("2", "6")
-        else:
-            text = text.replace("6", "2")
-    return text
-def enhance_image(img, mode="standard"):
-    """Enhance image with different modes for multi-scale processing"""
     try:
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        if mode == "seven_segment":
-            # Extremely minimal preprocessing for seven-segment displays
-            _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
-            return thresh
-        elif mode == "minimal":
-            # No blurring, just threshold
-            _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
-            return thresh
-        elif mode == "raw":
-            # No preprocessing, just convert to grayscale
-            return gray
-        elif mode == "high_contrast":
-            denoised = cv2.bilateralFilter(gray, d=11, sigmaColor=100, sigmaSpace=100)
-            clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
-            thresh = clahe.apply(denoised)
-        elif mode == "low_noise":
-            denoised = cv2.bilateralFilter(gray, d=7, sigmaColor=50, sigmaSpace=50)
-            clahe = cv2.createCLAHE(clipLimit=1.5, tileGridSize=(8, 8))
-            thresh = clahe.apply(denoised)
-        else:
-            denoised = cv2.bilateralFilter(gray, d=9, sigmaColor=75, sigmaSpace=75)
-            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
-            thresh = clahe.apply(denoised)
-        if mode not in ["seven_segment", "minimal", "raw"]:
-            thresh = cv2.adaptiveThreshold(thresh, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-                                          cv2.THRESH_BINARY, 11, 2)
-        # Morphological operations only for non-seven-segment modes
-        if mode not in ["seven_segment", "minimal", "raw"]:
-            kernel = np.ones((3, 3), np.uint8)
-            morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
-        else:
-            morphed = thresh
-        # Skip sharpening for seven-segment, minimal, and raw modes
-        if mode not in ["seven_segment", "minimal", "raw"]:
-            brightness = estimate_brightness(img)
-            sharpen_strength = 3 if brightness > 100 else 5
-            sharpen_kernel = np.array([[0, -1, 0], [-1, sharpen_strength, -1], [0, -1, 0]])
-            morphed = cv2.filter2D(morphed, -1, sharpen_kernel)
-        # Dynamic resizing
-        h, w = morphed.shape
-        target_size = 800
-        scale_factor = min(target_size / max(h, w), 2.0) if max(h, w) < 300 else min(target_size / max(h, w), 1.0)
-        if scale_factor != 1.0:
-            morphed = cv2.resize(morphed, None, fx=scale_factor, fy=scale_factor,
-                                interpolation=cv2.INTER_CUBIC if scale_factor > 1 else cv2.INTER_AREA)
-        return morphed
     except Exception as e:
-        logging.error(f"Image enhancement failed (mode={mode}): {str(e)}")
-        return img
 def extract_weight_from_image(pil_img):
     try:
         img = np.array(pil_img)
         img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
-        # Estimate brightness for adaptive thresholding
         brightness = estimate_brightness(img)
-        conf_threshold = 0.8 if brightness > 100 else 0.6  # Stricter for bright displays
         # Detect ROI
-        roi_img = detect_roi(img)
-        # Process multiple image versions
         images_to_process = [
-            ("seven_segment", enhance_image(roi_img, mode="seven_segment"), {'contrast_ths': 0.15, 'adjust_contrast': 0.7, 'text_threshold': 0.8, 'allowlist': '0123456789.'}),
-            ("minimal", enhance_image(roi_img, mode="minimal"), {'contrast_ths': 0.15, 'adjust_contrast': 0.7, 'text_threshold': 0.8, 'allowlist': '0123456789.'}),
-            ("raw", enhance_image(roi_img, mode="raw"), {'contrast_ths': 0.15, 'adjust_contrast': 0.7, 'text_threshold': 0.8, 'allowlist': '0123456789.'}),
-            ("standard", enhance_image(roi_img, mode="standard"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5, 'text_threshold': 0.7}),
-            ("high_contrast", enhance_image(roi_img, mode="high_contrast"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5, 'text_threshold': 0.7}),
-            ("low_noise", enhance_image(roi_img, mode="low_noise"), {'contrast_ths': 0.1, 'adjust_contrast': 0.5, 'text_threshold': 0.7}),
         ]
         best_weight = None
@@ -163,16 +174,12 @@ def extract_weight_from_image(pil_img):
         best_score = 0.0
         for mode, proc_img, ocr_params in images_to_process:
-            # EasyOCR detection
             results = easyocr_reader.readtext(proc_img, detail=1, paragraph=False, **ocr_params)
             for (bbox, text, conf) in results:
-                # Apply seven-segment correction
-                text = correct_seven_segment(text, bbox, roi_img)
-                original_text = text
                 text = text.lower().strip()
-                # Fix common OCR errors
                 text = text.replace(",", ".").replace(";", ".")
                 text = text.replace("o", "0").replace("O", "0")
                 text = text.replace("s", "5").replace("S", "5")
@@ -184,13 +191,10 @@ def extract_weight_from_image(pil_img):
                 text = text.replace("kgs", "").replace("kg", "").replace("k", "")
                 text = re.sub(r"[^\d\.]", "", text)
-                # Regex for weight (0.0 to 9999.999)
                 if re.fullmatch(r"\d{1,4}(\.\d{0,3})?", text):
                     try:
                         weight = float(text)
-                        # Score based on realistic weight range (0.1–500 kg)
                         range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
-                        # Prefer two-digit weights for scales
                         digit_score = 1.5 if 10 <= weight < 100 else 1.0
                         score = conf * range_score * digit_score
                         if score > best_score and conf > conf_threshold:
@@ -204,7 +208,6 @@ def extract_weight_from_image(pil_img):
             logging.info("No valid weight detected")
             return "Not detected", 0.0
-        # Format output
         if "." in best_weight:
             int_part, dec_part = best_weight.split(".")
             int_part = int_part.lstrip("0") or "0"

     """Detect and crop the region of interest (likely the digital display)"""
     try:
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
         brightness = estimate_brightness(img)
         thresh_value = 230 if brightness > 100 else 190
         _, thresh = cv2.threshold(gray, thresh_value, 255, cv2.THRESH_BINARY)
         kernel = np.ones((9, 9), np.uint8)
         dilated = cv2.dilate(thresh, kernel, iterations=3)
         contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
         if contours:
             valid_contours = [c for c in contours if cv2.contourArea(c) > 500]
             if valid_contours:
                 for contour in sorted(valid_contours, key=cv2.contourArea, reverse=True):
                     if 1.5 <= aspect_ratio <= 4.0 and w > 50 and h > 30:
                         x, y = max(0, x-40), max(0, y-40)
                         w, h = min(w+80, img.shape[1]-x), min(h+80, img.shape[0]-y)
+                        return img[y:y+h, x:x+w], (x, y, w, h)
+        return img, None
     except Exception as e:
         logging.error(f"ROI detection failed: {str(e)}")
+        return img, None
+def detect_segments(digit_img):
+    """Detect seven-segment patterns in a digit image"""
+    h, w = digit_img.shape
+    if h < 10 or w < 10:
+        return None
+    # Define segment regions (top, middle, bottom, left-top, left-bottom, right-top, right-bottom)
+    segments = {
+        'top': (0, w, 0, h//5),
+        'middle': (0, w, 2*h//5, 3*h//5),
+        'bottom': (0, w, 4*h//5, h),
+        'left_top': (0, w//5, 0, h//2),
+        'left_bottom': (0, w//5, h//2, h),
+        'right_top': (4*w//5, w, 0, h//2),
+        'right_bottom': (4*w//5, w, h//2, h)
+    }
+    segment_presence = {}
+    for name, (x1, x2, y1, y2) in segments.items():
+        region = digit_img[y1:y2, x1:x2]
+        if region.size == 0:
+            return None
+        # Count white pixels in the region
+        pixel_count = np.sum(region == 255)
+        total_pixels = region.size
+        # Segment is present if more than 50% of the region is white
+        segment_presence[name] = pixel_count > total_pixels * 0.5
+    # Seven-segment digit patterns
+    digit_patterns = {
+        '0': ('top', 'bottom', 'left_top', 'left_bottom', 'right_top', 'right_bottom'),
+        '1': ('right_top', 'right_bottom'),
+        '2': ('top', 'middle', 'bottom', 'left_bottom', 'right_top'),
+        '3': ('top', 'middle', 'bottom', 'right_top', 'right_bottom'),
+        '4': ('middle', 'left_top', 'right_top', 'right_bottom'),
+        '5': ('top', 'middle', 'bottom', 'left_top', 'right_bottom'),
+        '6': ('top', 'middle', 'bottom', 'left_top', 'left_bottom', 'right_bottom'),
+        '7': ('top', 'right_top', 'right_bottom'),
+        '8': ('top', 'middle', 'bottom', 'left_top', 'left_bottom', 'right_top', 'right_bottom'),
+        '9': ('top', 'middle', 'bottom', 'left_top', 'right_top', 'right_bottom')
+    }
+    best_match = None
+    max_matches = 0
+    for digit, pattern in digit_patterns.items():
+        matches = sum(1 for segment in pattern if segment_presence.get(segment, False))
+        non_matches = sum(1 for segment in segment_presence if segment not in pattern and segment_presence[segment])
+        score = matches - non_matches
+        if score > max_matches:
+            max_matches = score
+            best_match = digit
+    return best_match
+def custom_seven_segment_ocr(img, roi_bbox):
+    """Perform custom OCR for seven-segment displays"""
     try:
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+        # Use EasyOCR to get bounding boxes for digits
+        results = easyocr_reader.readtext(thresh, detail=1, paragraph=False,
+                                        contrast_ths=0.1, adjust_contrast=0.7,
+                                        text_threshold=0.9, mag_ratio=1.5,
+                                        allowlist='0123456789.')
+        if not results:
+            return None
+        # Sort bounding boxes left to right
+        digits = []
+        for (bbox, _, _) in results:
+            (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
+            x_min, x_max = min(x1, x4), max(x2, x3)
+            y_min, y_max = min(y1, y2), max(y3, y4)
+            digits.append((x_min, x_max, y_min, y_max))
+        digits.sort(key=lambda x: x[0])  # Sort by x_min (left to right)
+        # Extract and recognize each digit
+        recognized_text = ""
+        for x_min, x_max, y_min, y_max in digits:
+            x_min, y_min = max(0, int(x_min)), max(0, int(y_min))
+            x_max, y_max = min(thresh.shape[1], int(x_max)), min(thresh.shape[0], int(y_max))
+            if x_max <= x_min or y_max <= y_min:
+                continue
+            digit_img = thresh[y_min:y_max, x_min:x_max]
+            digit = detect_segments(digit_img)
+            if digit:
+                recognized_text += digit
+        # Validate the recognized text
+        text = recognized_text
+        text = re.sub(r"[^\d\.]", "", text)
+        if re.fullmatch(r"\d{1,4}(\.\d{0,3})?", text):
+            return text
+        return None
     except Exception as e:
+        logging.error(f"Custom seven-segment OCR failed: {str(e)}")
+        return None
 def extract_weight_from_image(pil_img):
     try:
         img = np.array(pil_img)
         img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
         brightness = estimate_brightness(img)
+        conf_threshold = 0.9 if brightness > 100 else 0.7
         # Detect ROI
+        roi_img, roi_bbox = detect_roi(img)
+        # Try custom seven-segment OCR first
+        custom_result = custom_seven_segment_ocr(roi_img, roi_bbox)
+        if custom_result:
+            # Format the custom result
+            if "." in custom_result:
+                int_part, dec_part = custom_result.split(".")
+                int_part = int_part.lstrip("0") or "0"
+                custom_result = f"{int_part}.{dec_part.rstrip('0')}"
+            else:
+                custom_result = custom_result.lstrip('0') or "0"
+            return custom_result, 100.0  # High confidence for custom OCR
+        # Fallback to EasyOCR if custom OCR fails
         images_to_process = [
+            ("raw", roi_img, {'contrast_ths': 0.1, 'adjust_contrast': 0.7, 'text_threshold': 0.9, 'mag_ratio': 1.5, 'allowlist': '0123456789.'}),
         ]
         best_weight = None
         best_score = 0.0
         for mode, proc_img, ocr_params in images_to_process:
+            if mode == "raw":
+                proc_img = cv2.cvtColor(proc_img, cv2.COLOR_BGR2GRAY)
             results = easyocr_reader.readtext(proc_img, detail=1, paragraph=False, **ocr_params)
             for (bbox, text, conf) in results:
                 text = text.lower().strip()
                 text = text.replace(",", ".").replace(";", ".")
                 text = text.replace("o", "0").replace("O", "0")
                 text = text.replace("s", "5").replace("S", "5")
                 text = text.replace("kgs", "").replace("kg", "").replace("k", "")
                 text = re.sub(r"[^\d\.]", "", text)
                 if re.fullmatch(r"\d{1,4}(\.\d{0,3})?", text):
                     try:
                         weight = float(text)
                         range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
                         digit_score = 1.5 if 10 <= weight < 100 else 1.0
                         score = conf * range_score * digit_score
                         if score > best_score and conf > conf_threshold:
             logging.info("No valid weight detected")
             return "Not detected", 0.0
         if "." in best_weight:
             int_part, dec_part = best_weight.split(".")
             int_part = int_part.lstrip("0") or "0"