AutoWeightLogger2

Sleeping

App Files Files Community

Sanjayraju30 commited on 13 days ago

Commit

ef265f2

verified ·

1 Parent(s): 58fea44

Update ocr_engine.py

Browse files

Files changed (1) hide show

ocr_engine.py +44 -371

ocr_engine.py CHANGED Viewed

@@ -2,306 +2,54 @@ import pytesseract
 import numpy as np
 import cv2
 import re
-import logging
-from datetime import datetime
-import os
 from PIL import Image
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-# Directory for debug images
-DEBUG_DIR = "debug_images"
-os.makedirs(DEBUG_DIR, exist_ok=True)
-def save_debug_image(img, filename_suffix, prefix=""):
-    """Save image to debug directory with timestamp."""
-    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
-    filename = os.path.join(DEBUG_DIR, f"{prefix}{timestamp}_{filename_suffix}.png")
-    if isinstance(img, Image.Image):
-        img.save(filename)
-    elif len(img.shape) == 3:
-        cv2.imwrite(filename, cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
-    else:
-        cv2.imwrite(filename, img)
-    logging.info(f"Saved debug image: {filename}")
-def estimate_brightness(img):
-    """Estimate image brightness."""
-    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-    return np.mean(gray)
 def preprocess_image(img):
-    """Preprocess image with simplified, robust contrast enhancement."""
-    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-    brightness = estimate_brightness(img)
-    # Apply mild CLAHE for contrast
-    clahe_clip = 8.0 if brightness < 90 else 4.0
-    clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8))
-    enhanced = clahe.apply(gray)
-    save_debug_image(enhanced, "01_preprocess_clahe")
-    # Light blur to reduce noise
-    blurred = cv2.GaussianBlur(enhanced, (5, 5), 0)
-    save_debug_image(blurred, "02_preprocess_blur")
-    # Dynamic thresholding with larger block size for small displays
-    block_size = max(7, min(31, int(img.shape[0] / 20) * 2 + 1))
-    thresh = cv2.adaptiveThreshold(
-        blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-        cv2.THRESH_BINARY_INV, block_size, 3
-    )
-    # Minimal morphological operations
-    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
-    thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
-    save_debug_image(thresh, "03_preprocess_morph")
-    return thresh, enhanced
-def correct_rotation(img):
-    """Correct image rotation using edge detection."""
     try:
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        edges = cv2.Canny(gray, 30, 100, apertureSize=3)
-        lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=25, minLineLength=15, maxLineGap=10)
-        if lines is not None:
-            angles = [np.arctan2(line[0][3] - line[0][1], line[0][2] - line[0][0]) * 180 / np.pi for line in lines]
-            angle = np.median(angles)
-            if abs(angle) > 0.3:
-                h, w = img.shape[:2]
-                center = (w // 2, h // 2)
-                M = cv2.getRotationMatrix2D(center, angle, 1.0)
-                img = cv2.warpAffine(img, M, (w, h))
-                save_debug_image(img, "00_rotated_image")
-                logging.info(f"Applied rotation: {angle:.2f} degrees")
-        return img
-    except Exception as e:
-        logging.error(f"Rotation correction failed: {str(e)}")
-        return img
-def detect_roi(img):
-    """Detect region of interest with broader contour analysis."""
-    try:
-        save_debug_image(img, "04_original")
-        thresh, enhanced = preprocess_image(img)
-        brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        block_sizes = [max(7, min(31, int(img.shape[0] / s) * 2 + 1)) for s in [5, 10, 20]]
-        valid_contours = []
-        img_area = img.shape[0] * img.shape[1]
-        for block_size in block_sizes:
-            temp_thresh = cv2.adaptiveThreshold(
-                enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-                cv2.THRESH_BINARY_INV, block_size, 3
-            )
-            kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
-            temp_thresh = cv2.morphologyEx(temp_thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
-            save_debug_image(temp_thresh, f"05_roi_threshold_block{block_size}")
-            contours, _ = cv2.findContours(temp_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-            for c in contours:
-                area = cv2.contourArea(c)
-                x, y, w, h = cv2.boundingRect(c)
-                roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
-                aspect_ratio = w / h
-                if (50 < area < (img_area * 0.95) and
-                    0.05 <= aspect_ratio <= 20.0 and w > 20 and h > 8 and roi_brightness > 15):
-                    valid_contours.append((c, area * roi_brightness))
-                    logging.debug(f"Contour (block {block_size}): Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
-        if valid_contours:
-            contour, _ = max(valid_contours, key=lambda x: x[1])
-            x, y, w, h = cv2.boundingRect(contour)
-            padding = max(5, min(20, int(min(w, h) * 0.4)))
-            x, y = max(0, x - padding), max(0, y - padding)
-            w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
-            roi_img = img[y:y+h, x:x+w]
-            save_debug_image(roi_img, "06_detected_roi")
-            logging.info(f"Detected ROI: ({x}, {y}, {w}, {h})")
-            return roi_img, (x, y, w, h)
-        logging.info("No ROI found, using full image.")
-        save_debug_image(img, "06_no_roi_fallback")
-        return img, None
-    except Exception as e:
-        logging.error(f"ROI detection failed: {str(e)}")
-        save_debug_image(img, "06_roi_error_fallback")
-        return img, None
-def detect_digit_template(digit_img, brightness):
-    """Digit recognition with expanded template matching."""
-    try:
-        h, w = digit_img.shape
-        if h < 5 or w < 2:
-            logging.debug("Digit image too small for template matching.")
-            return None
-        # Expanded digit templates for seven-segment display variations
-        digit_templates = {
-            '0': [
-                np.array([[1, 1, 1, 1, 1],
-                          [1, 0, 0, 0, 1],
-                          [1, 0, 0, 0, 1],
-                          [1, 0, 0, 0, 1],
-                          [1, 1, 1, 1, 1]], dtype=np.float32),
-                np.array([[1, 1, 1, 1],
-                          [1, 0, 0, 1],
-                          [1, 0, 0, 1],
-                          [1, 0, 0, 1],
-                          [1, 1, 1, 1]], dtype=np.float32)
-            ],
-            '1': [
-                np.array([[0, 0, 1, 0, 0],
-                          [0, 0, 1, 0, 0],
-                          [0, 0, 1, 0, 0],
-                          [0, 0, 1, 0, 0],
-                          [0, 0, 1, 0, 0]], dtype=np.float32),
-                np.array([[0, 1, 0],
-                          [0, 1, 0],
-                          [0, 1, 0],
-                          [0, 1, 0],
-                          [0, 1, 0]], dtype=np.float32)
-            ],
-            '2': [
-                np.array([[1, 1, 1, 1, 1],
-                          [0, 0, 0, 1, 1],
-                          [1, 1, 1, 1, 1],
-                          [1, 1, 0, 0, 0],
-                          [1, 1, 1, 1, 1]], dtype=np.float32),
-                np.array([[1, 1, 1, 1],
-                          [0, 0, 1, 1],
-                          [1, 1, 1, 1],
-                          [1, 1, 0, 0],
-                          [1, 1, 1, 1]], dtype=np.float32)
-            ],
-            '3': [
-                np.array([[1, 1, 1, 1, 1],
-                          [0, 0, 0, 1, 1],
-                          [1, 1, 1, 1, 1],
-                          [0, 0, 0, 1, 1],
-                          [1, 1, 1, 1, 1]], dtype=np.float32),
-                np.array([[1, 1, 1, 1],
-                          [0, 0, 1, 1],
-                          [1, 1, 1, 1],
-                          [0, 0, 1, 1],
-                          [1, 1, 1, 1]], dtype=np.float32)
-            ],
-            '4': [
-                np.array([[1, 1, 0, 0, 1],
-                          [1, 1, 0, 0, 1],
-                          [1, 1, 1, 1, 1],
-                          [0, 0, 0, 0, 1],
-                          [0, 0, 0, 0, 1]], dtype=np.float32),
-                np.array([[1, 0, 0, 1],
-                          [1, 0, 0, 1],
-                          [1, 1, 1, 1],
-                          [0, 0, 0, 1],
-                          [0, 0, 0, 1]], dtype=np.float32)
-            ],
-            '5': [
-                np.array([[1, 1, 1, 1, 1],
-                          [1, 1, 0, 0, 0],
-                          [1, 1, 1, 1, 1],
-                          [0, 0, 0, 1, 1],
-                          [1, 1, 1, 1, 1]], dtype=np.float32),
-                np.array([[1, 1, 1, 1],
-                          [1, 1, 0, 0],
-                          [1, 1, 1, 1],
-                          [0, 0, 1, 1],
-                          [1, 1, 1, 1]], dtype=np.float32)
-            ],
-            '6': [
-                np.array([[1, 1, 1, 1, 1],
-                          [1, 1, 0, 0, 0],
-                          [1, 1, 1, 1, 1],
-                          [1, 0, 0, 1, 1],
-                          [1, 1, 1, 1, 1]], dtype=np.float32),
-                np.array([[1, 1, 1, 1],
-                          [1, 1, 0, 0],
-                          [1, 1, 1, 1],
-                          [1, 0, 1, 1],
-                          [1, 1, 1, 1]], dtype=np.float32)
-            ],
-            '7': [
-                np.array([[1, 1, 1, 1, 1],
-                          [0, 0, 0, 0, 1],
-                          [0, 0, 0, 0, 1],
-                          [0, 0, 0, 0, 1],
-                          [0, 0, 0, 0, 1]], dtype=np.float32),
-                np.array([[1, 1, 1, 1],
-                          [0, 0, 0, 1],
-                          [0, 0, 0, 1],
-                          [0, 0, 0, 1],
-                          [0, 0, 0, 1]], dtype=np.float32)
-            ],
-            '8': [
-                np.array([[1, 1, 1, 1, 1],
-                          [1, 0, 0, 0, 1],
-                          [1, 1, 1, 1, 1],
-                          [1, 0, 0, 0, 1],
-                          [1, 1, 1, 1, 1]], dtype=np.float32),
-                np.array([[1, 1, 1, 1],
-                          [1, 0, 0, 1],
-                          [1, 1, 1, 1],
-                          [1, 0, 0, 1],
-                          [1, 1, 1, 1]], dtype=np.float32)
-            ],
-            '9': [
-                np.array([[1, 1, 1, 1, 1],
-                          [1, 0, 0, 0, 1],
-                          [1, 1, 1, 1, 1],
-                          [0, 0, 0, 1, 1],
-                          [1, 1, 1, 1, 1]], dtype=np.float32),
-                np.array([[1, 1, 1, 1],
-                          [1, 0, 0, 1],
-                          [1, 1, 1, 1],
-                          [0, 0, 1, 1],
-                          [1, 1, 1, 1]], dtype=np.float32)
-            ],
-            '.': [
-                np.array([[0, 0, 0],
-                          [0, 1, 0],
-                          [0, 0, 0]], dtype=np.float32),
-                np.array([[0, 0],
-                          [1, 0],
-                          [0, 0]], dtype=np.float32)
-            ]
-        }
-        # Try multiple sizes for digit image
-        sizes = [(5, 5), (4, 4), (3, 3)] if h > w else [(3, 3), (2, 2)]
-        best_match, best_score = None, -1
-        for size in sizes:
-            digit_img_resized = cv2.resize(digit_img, size, interpolation=cv2.INTER_AREA)
-            digit_img_resized = (digit_img_resized > 100).astype(np.float32)  # Binarize
-            for digit, templates in digit_templates.items():
-                for template in templates:
-                    if digit == '.' and size[0] > 3:
-                        continue
-                    if digit != '.' and size[0] <= 3:
-                        continue
-                    if template.shape[0] != size[0] or template.shape[1] != size[1]:
-                        continue
-                    result = cv2.matchTemplate(digit_img_resized, template, cv2.TM_CCOEFF_NORMED)
-                    _, max_val, _, _ = cv2.minMaxLoc(result)
-                    if max_val > 0.55 and max_val > best_score:  # Further lowered threshold
-                        best_score = max_val
-                        best_match = digit
-        logging.debug(f"Template match: {best_match}, Score: {best_score:.2f}")
-        return best_match if best_score > 0.55 else None
     except Exception as e:
-        logging.error(f"Template digit detection failed: {str(e)}")
-        return None
-def perform_ocr(img, roi_bbox):
-    """Perform OCR with Tesseract and robust template fallback."""
     try:
-        thresh, enhanced = preprocess_image(img)
-        brightness = estimate_brightness(img)
-        pil_img = Image.fromarray(enhanced)
-        save_debug_image(pil_img, "07_ocr_input")
         # Try multiple Tesseract configurations
         configs = [
@@ -309,8 +57,10 @@ def perform_ocr(img, roi_bbox):
             r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.'   # Block of text
         ]
         for config in configs:
-            text = pytesseract.image_to_string(pil_img, config=config)
             logging.info(f"Tesseract raw output (config {config}): {text}")
             text = re.sub(r"[^\d\.]", "", text)
             if text.count('.') > 1:
                 text = text.replace('.', '', text.count('.') - 1)
@@ -318,91 +68,14 @@ def perform_ocr(img, roi_bbox):
             if text and re.fullmatch(r"^\d*\.?\d*$", text):
                 text = text.lstrip('0') or '0'
                 confidence = 95.0 if len(text.replace('.', '')) >= 3 else 90.0
-                logging.info(f"Validated Tesseract text: {text}, Confidence: {confidence:.2f}%")
-                return text, confidence
-        # Fallback to template-based detection
-        logging.info("Tesseract failed, using template-based detection.")
-        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-        digits_info = []
-        for c in contours:
-            x, y, w, h = cv2.boundingRect(c)
-            if w > 4 and h > 5 and 0.03 <= w/h <= 4.0:
-                digits_info.append((x, x+w, y, y+h))
-        if digits_info:
-            digits_info.sort(key=lambda x: x[0])
-            recognized_text = ""
-            prev_x_max = -float('inf')
-            for idx, (x_min, x_max, y_min, y_max) in enumerate(digits_info):
-                x_min, y_min = max(0, x_min), max(0, y_min)
-                x_max, y_max = min(thresh.shape[1], x_max), min(thresh.shape[0], y_max)
-                if x_max <= x_min or y_max <= y_min:
-                    continue
-                digit_crop = thresh[y_min:y_max, x_min:x_max]
-                save_debug_image(digit_crop, f"08_digit_crop_{idx}")
-                digit = detect_digit_template(digit_crop, brightness)
-                if digit:
-                    recognized_text += digit
-                elif x_min - prev_x_max < 10 and prev_x_max != -float('inf'):
-                    recognized_text += '.'
-                prev_x_max = x_max
-            text = re.sub(r"[^\d\.]", "", recognized_text)
-            if text.count('.') > 1:
-                text = text.replace('.', '', text.count('.') - 1)
-            text = text.strip('.')
-            if text and re.fullmatch(r"^\d*\.?\d*$", text):
-                text = text.lstrip('0') or '0'
-                confidence = 90.0 if len(text.replace('.', '')) >= 3 else 85.0
-                logging.info(f"Validated template text: {text}, Confidence: {confidence:.2f}%")
-                return text, confidence
-        logging.info("No valid digits detected.")
-        return None, 0.0
-    except Exception as e:
-        logging.error(f"OCR failed: {str(e)}")
-        return None, 0.0
-def extract_weight_from_image(pil_img):
-    """Extract weight from any digital scale image."""
-    try:
-        img = np.array(pil_img)
-        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
-        save_debug_image(img, "00_input_image")
-        img = correct_rotation(img)
-        brightness = estimate_brightness(img)
-        conf_threshold = 0.65 if brightness > 70 else 0.45
-        # Try ROI-based detection
-        roi_img, roi_bbox = detect_roi(img)
-        if roi_bbox:
-            conf_threshold *= 1.15 if (roi_bbox[2] * roi_bbox[3]) > (img.shape[0] * img.shape[1] * 0.05) else 1.0
-        result, confidence = perform_ocr(roi_img, roi_bbox)
-        if result and confidence >= conf_threshold * 100:
-            try:
-                weight = float(result)
-                if 0.001 <= weight <= 5000:
-                    logging.info(f"Detected weight: {result} kg, Confidence: {confidence:.2f}%")
-                    return result, confidence
-                logging.warning(f"Weight {result} out of range.")
-            except ValueError:
-                logging.warning(f"Invalid weight format: {result}")
-        # Full image fallback
-        logging.info("Primary OCR failed, using full image fallback.")
-        result, confidence = perform_ocr(img, None)
-        if result and confidence >= conf_threshold * 0.85 * 100:
-            try:
-                weight = float(result)
-                if 0.001 <= weight <= 5000:
-                    logging.info(f"Full image weight: {result} kg, Confidence: {confidence:.2f}%")
-                    return result, confidence
-                logging.warning(f"Full image weight {result} out of range.")
-            except ValueError:
-                logging.warning(f"Invalid full image weight format: {result}")
         logging.info("No valid weight detected.")
         return "Not detected", 0.0
     except Exception as e:

 import numpy as np
 import cv2
 import re
 from PIL import Image
+import logging
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 def preprocess_image(img):
+    """Preprocess image for robust OCR."""
     try:
+        # Convert to OpenCV format
+        img = np.array(img)
+        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+        # Convert to grayscale
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        # Estimate brightness for adaptive processing
+        brightness = np.mean(gray)
+        # Apply CLAHE for contrast enhancement
+        clahe_clip = 4.0 if brightness < 100 else 2.0
+        clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8))
+        enhanced = clahe.apply(gray)
+        # Apply adaptive thresholding
+        block_size = max(11, min(31, int(img.shape[0] / 20) * 2 + 1))
+        thresh = cv2.adaptiveThreshold(
+            enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, block_size, 2
+        )
+        # Noise reduction
+        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
+        thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
+        return thresh
     except Exception as e:
+        logging.error(f"Preprocessing failed: {str(e)}")
+        return img
+def extract_weight_from_image(pil_img):
+    """Extract weight from any digital scale image."""
     try:
+        # Convert PIL image to OpenCV
+        img = np.array(pil_img)
+        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+        # Preprocess image
+        thresh = preprocess_image(img)
         # Try multiple Tesseract configurations
         configs = [
             r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.'   # Block of text
         ]
         for config in configs:
+            text = pytesseract.image_to_string(thresh, config=config)
             logging.info(f"Tesseract raw output (config {config}): {text}")
+            # Clean and validate text
             text = re.sub(r"[^\d\.]", "", text)
             if text.count('.') > 1:
                 text = text.replace('.', '', text.count('.') - 1)
             if text and re.fullmatch(r"^\d*\.?\d*$", text):
                 text = text.lstrip('0') or '0'
                 confidence = 95.0 if len(text.replace('.', '')) >= 3 else 90.0
+                try:
+                    weight = float(text)
+                    if 0.001 <= weight <= 5000:
+                        logging.info(f"Detected weight: {text} kg, Confidence: {confidence:.2f}%")
+                        return text, confidence
+                except ValueError:
+                    logging.warning(f"Invalid weight format: {text}")
         logging.info("No valid weight detected.")
         return "Not detected", 0.0
     except Exception as e: