AutoWeightLogger1

Sleeping

App Files Files Community

Sanjayraju30 commited on 24 days ago

Commit

2b694be

verified ·

1 Parent(s): 199a126

Update ocr_engine.py

Browse files

Files changed (1) hide show

ocr_engine.py +105 -46

ocr_engine.py CHANGED Viewed

@@ -12,36 +12,78 @@ easyocr_reader = easyocr.Reader(['en'], gpu=False)
 def estimate_blur(img):
     """Estimate image blur using Laplacian variance"""
-    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-    return cv2.Laplacian(gray, cv2.CV_64F).var()
-def enhance_image(img):
     try:
-        # Convert to grayscale
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        # Bilateral filter for noise reduction while preserving edges
-        denoised = cv2.bilateralFilter(gray, d=9, sigmaColor=75, sigmaSpace=75)
-        # CLAHE for contrast enhancement
-        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
         contrast = clahe.apply(denoised)
-        # Adaptive thresholding for uneven lighting
         thresh = cv2.adaptiveThreshold(contrast, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                       cv2.THRESH_BINARY, 11, 2)
-        # Morphological operations to enhance text
         kernel = np.ones((3, 3), np.uint8)
         morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
-        # Sharpen image
-        sharpen_kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
         sharpened = cv2.filter2D(morphed, -1, sharpen_kernel)
         # Dynamic resizing
         h, w = sharpened.shape
-        target_size = 800  # Target max dimension for OCR
         scale_factor = min(target_size / max(h, w), 2.0) if max(h, w) < 300 else min(target_size / max(h, w), 1.0)
         if scale_factor != 1.0:
             sharpened = cv2.resize(sharpened, None, fx=scale_factor, fy=scale_factor,
@@ -49,49 +91,66 @@ def enhance_image(img):
         return sharpened
     except Exception as e:
-        logging.error(f"Image enhancement failed: {str(e)}")
-        return img  # Return original image as fallback
 def extract_weight_from_image(pil_img):
     try:
         img = np.array(pil_img)
         img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
-        # Estimate blur to adjust confidence threshold
         blur_score = estimate_blur(img)
-        conf_threshold = 0.3 if blur_score < 100 else 0.5  # Lower threshold for blurry images
-        # Preprocess image
-        processed = enhance_image(img)
-        # Initialize results
         best_weight = None
         best_conf = 0.0
-        # EasyOCR detection
-        results = easyocr_reader.readtext(processed, detail=1, paragraph=False)
-        if not results:  # Fallback to original image if no results
-            results = easyocr_reader.readtext(img, detail=1, paragraph=False)
-        for (bbox, text, conf) in results:
-            original_text = text
-            text = text.lower().strip()
-            # Fix common OCR errors
-            text = text.replace(",", ".").replace(";", ".")
-            text = text.replace("o", "0").replace("O", "0")
-            text = text.replace("s", "5").replace("S", "5")
-            text = text.replace("g", "9").replace("G", "6")
-            text = text.replace("l", "1").replace("I", "1")
-            text = text.replace("b", "8").replace("B", "8")
-            text = text.replace("kgs", "").replace("kg", "").replace("k", "")
-            text = re.sub(r"[^\d\.]", "", text)
-            # Regex for weight (0.0 to 9999.999)
-            if re.fullmatch(r"\d{1,4}(\.\d{0,3})?", text):
-                if conf > best_conf and conf > conf_threshold:
-                    best_weight = text
-                    best_conf = conf
         if not best_weight:
             logging.info("No valid weight detected")
@@ -103,7 +162,7 @@ def extract_weight_from_image(pil_img):
             int_part = int_part.lstrip("0") or "0"
             best_weight = f"{int_part}.{dec_part.rstrip('0')}"
         else:
-            best_weight = best_weight.lstrip("0") or "0"
         return best_weight, round(best_conf * 100, 2)

 def estimate_blur(img):
     """Estimate image blur using Laplacian variance"""
+    try:
+        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        return cv2.Laplacian(gray, cv2.CV_64F).var()
+    except Exception as e:
+        logging.error(f"Blur estimation failed: {str(e)}")
+        return 100  # Default value for fallback
+def detect_roi(img):
+    """Detect and crop the region of interest (likely the digital display)"""
     try:
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        # Adaptive thresholding to handle varying lighting
+        thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+                                      cv2.THRESH_BINARY_INV, 11, 2)
+        # Dilate to connect text regions
+        kernel = np.ones((5, 5), np.uint8)
+        dilated = cv2.dilate(thresh, kernel, iterations=1)
+        # Find contours
+        contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        if contours:
+            # Get the largest contour with reasonable size
+            valid_contours = [c for c in contours if cv2.contourArea(c) > 1000]
+            if valid_contours:
+                largest_contour = max(valid_contours, key=cv2.contourArea)
+                x, y, w, h = cv2.boundingRect(largest_contour)
+                # Add padding and ensure bounds
+                x, y = max(0, x-20), max(0, y-20)
+                w, h = min(w+40, img.shape[1]-x), min(h+40, img.shape[0]-y)
+                if w > 50 and h > 30:  # Minimum size for valid ROI
+                    return img[y:y+h, x:x+w]
+        return img  # Fallback to original image
+    except Exception as e:
+        logging.error(f"ROI detection failed: {str(e)}")
+        return img
+def enhance_image(img, mode="standard"):
+    """Enhance image with different modes for multi-scale processing"""
+    try:
+        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        if mode == "high_contrast":
+            # Stronger denoising and contrast for blurry images
+            denoised = cv2.bilateralFilter(gray, d=11, sigmaColor=100, sigmaSpace=100)
+            clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
+        elif mode == "low_noise":
+            # Gentle denoising for clear but noisy images
+            denoised = cv2.bilateralFilter(gray, d=7, sigmaColor=50, sigmaSpace=50)
+            clahe = cv2.createCLAHE(clipLimit=1.5, tileGridSize=(8, 8))
+        else:
+            # Standard preprocessing
+            denoised = cv2.bilateralFilter(gray, d=9, sigmaColor=75, sigmaSpace=75)
+            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
         contrast = clahe.apply(denoised)
+        # Adaptive thresholding
         thresh = cv2.adaptiveThreshold(contrast, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                       cv2.THRESH_BINARY, 11, 2)
+        # Morphological operations
         kernel = np.ones((3, 3), np.uint8)
         morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
+        # Adaptive sharpening
+        blur_score = estimate_blur(img)
+        sharpen_strength = 5 if blur_score < 100 else 3
+        sharpen_kernel = np.array([[0, -1, 0], [-1, sharpen_strength, -1], [0, -1, 0]])
         sharpened = cv2.filter2D(morphed, -1, sharpen_kernel)
         # Dynamic resizing
         h, w = sharpened.shape
+        target_size = 800
         scale_factor = min(target_size / max(h, w), 2.0) if max(h, w) < 300 else min(target_size / max(h, w), 1.0)
         if scale_factor != 1.0:
             sharpened = cv2.resize(sharpened, None, fx=scale_factor, fy=scale_factor,
         return sharpened
     except Exception as e:
+        logging.error(f"Image enhancement failed (mode={mode}): {str(e)}")
+        return img
 def extract_weight_from_image(pil_img):
     try:
         img = np.array(pil_img)
         img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+        # Estimate blur for adaptive thresholding
         blur_score = estimate_blur(img)
+        conf_threshold = 0.35 if blur_score < 100 else 0.55  # Slightly stricter thresholds
+        # Detect ROI
+        roi_img = detect_roi(img)
+        # Process multiple image versions
+        images_to_process = [
+            ("standard", enhance_image(roi_img, mode="standard"), {}),
+            ("high_contrast", enhance_image(roi_img, mode="high_contrast"), {}),
+            ("low_noise", enhance_image(roi_img, mode="low_noise"), {}),
+            ("original", roi_img, {'allowlist': '0123456789.'})  # Restrict to digits and decimal
+        ]
         best_weight = None
         best_conf = 0.0
+        best_score = 0.0
+        for mode, proc_img, ocr_params in images_to_process:
+            # EasyOCR detection
+            results = easyocr_reader.readtext(proc_img, detail=1, paragraph=False, **ocr_params)
+            for (bbox, text, conf) in results:
+                original_text = text
+                text = text.lower().strip()
+                # Fix common OCR errors
+                text = text.replace(",", ".").replace(";", ".")
+                text = text.replace("o", "0").replace("O", "0")
+                text = text.replace("s", "5").replace("S", "5")
+                text = text.replace("g", "9").replace("G", "6")
+                text = text.replace("l", "1").replace("I", "1")
+                text = text.replace("b", "8").replace("B", "8")
+                text = text.replace("z", "2").replace("Z", "2")
+                text = text.replace("q", "9").replace("Q", "9")
+                text = text.replace("kgs", "").replace("kg", "").replace("k", "")
+                text = re.sub(r"[^\d\.]", "", text)
+                # Regex for weight (0.0 to 9999.999)
+                if re.fullmatch(r"\d{1,4}(\.\d{0,3})?", text):
+                    try:
+                        weight = float(text)
+                        # Score based on realistic weight range (0.1–500 kg)
+                        range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
+                        score = conf * range_score
+                        if score > best_score and conf > conf_threshold:
+                            best_weight = text
+                            best_conf = conf
+                            best_score = score
+                    except ValueError:
+                        continue
         if not best_weight:
             logging.info("No valid weight detected")
             int_part = int_part.lstrip("0") or "0"
             best_weight = f"{int_part}.{dec_part.rstrip('0')}"
         else:
+            best_weight = best_weight.lstrip('0') or "0"
         return best_weight, round(best_conf * 100, 2)