AutoWeightLogger1

Sleeping

App Files Files Community

Sanjayraju30 commited on 22 days ago

Commit

ded0d50

verified ·

1 Parent(s): 013fbf8

Update ocr_engine.py

Browse files

Files changed (1) hide show

ocr_engine.py +177 -69

ocr_engine.py CHANGED Viewed

@@ -1,24 +1,21 @@
 import numpy as np
 import cv2
 import re
 import logging
 from datetime import datetime
 import os
-from PIL import Image
-from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-# Initialize TrOCR with error handling
 try:
-    processor = TrOCRProcessor.from_pretrained("microsoft/trocr-small-printed")
-    model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-small-printed")
-    logging.info("TrOCR model and processor loaded successfully")
 except Exception as e:
-    logging.error(f"Failed to load TrOCR model: {str(e)}")
-    processor = None
-    model = None
 # Directory for debug images
 DEBUG_DIR = "debug_images"
@@ -28,9 +25,7 @@ def save_debug_image(img, filename_suffix, prefix=""):
     """Save image to debug directory with timestamp."""
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
     filename = os.path.join(DEBUG_DIR, f"{prefix}{timestamp}_{filename_suffix}.png")
-    if isinstance(img, Image.Image):
-        img.save(filename)
-    elif len(img.shape) == 3:
         cv2.imwrite(filename, cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
     else:
         cv2.imwrite(filename, img)
@@ -44,23 +39,23 @@ def estimate_brightness(img):
 def preprocess_image(img):
     """Preprocess image for OCR with enhanced contrast and noise reduction."""
     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-    # Dynamic contrast adjustment based on brightness
     brightness = estimate_brightness(img)
-    clahe_clip = 4.0 if brightness < 100 else 2.0
     clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8))
     enhanced = clahe.apply(gray)
     save_debug_image(enhanced, "01_preprocess_clahe")
     # Gaussian blur to reduce noise
     blurred = cv2.GaussianBlur(enhanced, (3, 3), 0)
     save_debug_image(blurred, "02_preprocess_blur")
-    # Adaptive thresholding for digit segmentation
-    block_size = max(11, min(31, int(img.shape[0] / 20) * 2 + 1))
     thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-                                   cv2.THRESH_BINARY_INV, block_size, 2)
-    # Morphological operations to clean up digits
     kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
     thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
-    thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
     save_debug_image(thresh, "03_preprocess_morph")
     return thresh, enhanced
@@ -68,7 +63,7 @@ def correct_rotation(img):
     """Correct image rotation using edge detection."""
     try:
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        edges = cv2.Canny(gray, 50, 150)
         lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=50, minLineLength=30, maxLineGap=10)
         if lines is not None:
             angles = [np.arctan2(line[0][3] - line[0][1], line[0][2] - line[0][0]) * 180 / np.pi for line in lines]
@@ -86,72 +81,185 @@ def correct_rotation(img):
         return img
 def detect_roi(img):
-    """Detect region of interest (display) with refined contour filtering."""
     try:
         save_debug_image(img, "04_original")
         thresh, enhanced = preprocess_image(img)
         brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-        if contours:
-            img_area = img.shape[0] * img.shape[1]
-            valid_contours = []
             for c in contours:
                 area = cv2.contourArea(c)
                 x, y, w, h = cv2.boundingRect(c)
                 roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
                 aspect_ratio = w / h
-                # Relaxed constraints for digital displays
-                if (200 < area < (img_area * 0.8) and
-                    0.5 <= aspect_ratio <= 15.0 and w > 50 and h > 20 and roi_brightness > 30):
                     valid_contours.append((c, area * roi_brightness))
-                    logging.debug(f"Contour: Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
-            if valid_contours:
-                contour, _ = max(valid_contours, key=lambda x: x[1])
-                x, y, w, h = cv2.boundingRect(contour)
-                padding = max(15, min(50, int(min(w, h) * 0.3)))
-                x, y = max(0, x - padding), max(0, y - padding)
-                w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
-                roi_img = img[y:y+h, x:x+w]
-                save_debug_image(roi_img, "05_detected_roi")
-                logging.info(f"Detected ROI: ({x}, {y}, {w}, {h})")
-                return roi_img, (x, y, w, h)
         logging.info("No ROI found, using full image.")
-        save_debug_image(img, "05_no_roi_fallback")
         return img, None
     except Exception as e:
         logging.error(f"ROI detection failed: {str(e)}")
-        save_debug_image(img, "05_roi_error_fallback")
         return img, None
-def perform_ocr(img):
-    """Perform OCR using TrOCR for digital displays."""
-    if processor is None or model is None:
-        logging.error("TrOCR model not loaded, cannot perform OCR.")
         return None, 0.0
     try:
-        # Convert to PIL for TrOCR
-        pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
-        save_debug_image(pil_img, "06_ocr_input")
-        # Process image with TrOCR
-        pixel_values = processor(pil_img, return_tensors="pt").pixel_values
-        generated_ids = model.generate(pixel_values, max_length=10)
-        text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        logging.info(f"TrOCR raw output: {text}")
-        # Clean and validate text
-        text = re.sub(r"[^\d\.]", "", text)
-        if text.count('.') > 1:
-            text = text.replace('.', '', text.count('.') - 1)
-        text = text.strip('.')
-        if text and re.fullmatch(r"^\d*\.?\d*$", text):
-            text = text.lstrip('0') or '0'
-            confidence = 95.0 if len(text.replace('.', '')) >= 2 else 85.0
-            logging.info(f"Validated text: {text}, Confidence: {confidence:.2f}%")
-            return text, confidence
-        logging.info(f"Text '{text}' failed validation.")
         return None, 0.0
     except Exception as e:
         logging.error(f"OCR failed: {str(e)}")
@@ -171,11 +279,11 @@ def extract_weight_from_image(pil_img):
         if roi_bbox:
             conf_threshold *= 1.1 if (roi_bbox[2] * roi_bbox[3]) > (img.shape[0] * img.shape[1] * 0.3) else 1.0
-        result, confidence = perform_ocr(roi_img)
         if result and confidence >= conf_threshold * 100:
             try:
                 weight = float(result)
-                if 0.01 <= weight <= 1000:  # Narrowed range for typical scale weights
                     logging.info(f"Detected weight: {result} kg, Confidence: {confidence:.2f}%")
                     return result, confidence
                 logging.warning(f"Weight {result} out of range.")
@@ -183,7 +291,7 @@ def extract_weight_from_image(pil_img):
                 logging.warning(f"Invalid weight format: {result}")
         logging.info("Primary OCR failed, using full image fallback.")
-        result, confidence = perform_ocr(img)
         if result and confidence >= conf_threshold * 0.9 * 100:
             try:
                 weight = float(result)

+import easyocr
 import numpy as np
 import cv2
 import re
 import logging
 from datetime import datetime
 import os
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# Initialize EasyOCR
 try:
+    easyocr_reader = easyocr.Reader(['en'], gpu=False)
+    logging.info("EasyOCR initialized successfully")
 except Exception as e:
+    logging.error(f"Failed to initialize EasyOCR: {str(e)}")
+    easyocr_reader = None
 # Directory for debug images
 DEBUG_DIR = "debug_images"
     """Save image to debug directory with timestamp."""
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
     filename = os.path.join(DEBUG_DIR, f"{prefix}{timestamp}_{filename_suffix}.png")
+    if len(img.shape) == 3:
         cv2.imwrite(filename, cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
     else:
         cv2.imwrite(filename, img)
 def preprocess_image(img):
     """Preprocess image for OCR with enhanced contrast and noise reduction."""
     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
     brightness = estimate_brightness(img)
+    # Dynamic CLAHE based on brightness
+    clahe_clip = 4.0 if brightness < 80 else 2.0
     clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8))
     enhanced = clahe.apply(gray)
     save_debug_image(enhanced, "01_preprocess_clahe")
     # Gaussian blur to reduce noise
     blurred = cv2.GaussianBlur(enhanced, (3, 3), 0)
     save_debug_image(blurred, "02_preprocess_blur")
+    # Adaptive thresholding with dynamic block size
+    block_size = max(11, min(31, int(img.shape[0] / 15) * 2 + 1))
     thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+                                   cv2.THRESH_BINARY_INV, block_size, 5)
+    # Morphological operations to enhance digits
     kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
     thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
+    thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
     save_debug_image(thresh, "03_preprocess_morph")
     return thresh, enhanced
     """Correct image rotation using edge detection."""
     try:
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        edges = cv2.Canny(gray, 50, 150, apertureSize=3)
         lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=50, minLineLength=30, maxLineGap=10)
         if lines is not None:
             angles = [np.arctan2(line[0][3] - line[0][1], line[0][2] - line[0][0]) * 180 / np.pi for line in lines]
         return img
 def detect_roi(img):
+    """Detect region of interest (display) with multi-scale contour filtering."""
     try:
         save_debug_image(img, "04_original")
         thresh, enhanced = preprocess_image(img)
         brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        # Try multiple block sizes for robust ROI detection
+        block_sizes = [max(11, min(31, int(img.shape[0] / s) * 2 + 1)) for s in [15, 20, 25]]
+        valid_contours = []
+        img_area = img.shape[0] * img.shape[1]
+        for block_size in block_sizes:
+            temp_thresh = cv2.adaptiveThreshold(enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+                                                cv2.THRESH_BINARY_INV, block_size, 5)
+            kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
+            temp_thresh = cv2.morphologyEx(temp_thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
+            save_debug_image(temp_thresh, f"05_roi_threshold_block{block_size}")
+            contours, _ = cv2.findContours(temp_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
             for c in contours:
                 area = cv2.contourArea(c)
                 x, y, w, h = cv2.boundingRect(c)
                 roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
                 aspect_ratio = w / h
+                if (300 < area < (img_area * 0.7) and
+                    0.5 <= aspect_ratio <= 10.0 and w > 60 and h > 25 and roi_brightness > 40):
                     valid_contours.append((c, area * roi_brightness))
+                    logging.debug(f"Contour (block {block_size}): Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
+        if valid_contours:
+            contour, _ = max(valid_contours, key=lambda x: x[1])
+            x, y, w, h = cv2.boundingRect(contour)
+            padding = max(20, min(60, int(min(w, h) * 0.3)))
+            x, y = max(0, x - padding), max(0, y - padding)
+            w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
+            roi_img = img[y:y+h, x:x+w]
+            save_debug_image(roi_img, "06_detected_roi")
+            logging.info(f"Detected ROI: ({x}, {y}, {w}, {h})")
+            return roi_img, (x, y, w, h)
         logging.info("No ROI found, using full image.")
+        save_debug_image(img, "06_no_roi_fallback")
         return img, None
     except Exception as e:
         logging.error(f"ROI detection failed: {str(e)}")
+        save_debug_image(img, "06_roi_error_fallback")
         return img, None
+def detect_segments(digit_img, brightness):
+    """Detect seven-segment digits with adaptive thresholds."""
+    try:
+        h, w = digit_img.shape
+        if h < 10 or w < 5:
+            logging.debug("Digit image too small for segment detection.")
+            return None
+        # Dynamic segment threshold based on brightness
+        segment_threshold = 0.2 if brightness < 80 else 0.3
+        segments = {
+            'top': (int(w*0.1), int(w*0.9), 0, int(h*0.25)),
+            'middle': (int(w*0.1), int(w*0.9), int(h*0.45), int(h*0.55)),
+            'bottom': (int(w*0.1), int(w*0.9), int(h*0.75), h),
+            'left_top': (0, int(w*0.3), int(h*0.1), int(h*0.5)),
+            'left_bottom': (0, int(w*0.3), int(h*0.5), int(h*0.9)),
+            'right_top': (int(w*0.7), w, int(h*0.1), int(h*0.5)),
+            'right_bottom': (int(w*0.7), w, int(h*0.5), int(h*0.9))
+        }
+        segment_presence = {}
+        for name, (x1, x2, y1, y2) in segments.items():
+            x1, y1 = max(0, x1), max(0, y1)
+            x2, y2 = min(w, x2), min(h, y2)
+            region = digit_img[y1:y2, x1:x2]
+            if region.size == 0:
+                segment_presence[name] = False
+                continue
+            pixel_count = np.sum(region == 255)
+            total_pixels = region.size
+            segment_presence[name] = pixel_count / total_pixels > segment_threshold
+            logging.debug(f"Segment {name}: {pixel_count}/{total_pixels} = {pixel_count/total_pixels:.2f}")
+        digit_patterns = {
+            '0': ('top', 'bottom', 'left_top', 'left_bottom', 'right_top', 'right_bottom'),
+            '1': ('right_top', 'right_bottom'),
+            '2': ('top', 'middle', 'bottom', 'left_bottom', 'right_top'),
+            '3': ('top', 'middle', 'bottom', 'right_top', 'right_bottom'),
+            '4': ('middle', 'left_top', 'right_top', 'right_bottom'),
+            '5': ('top', 'middle', 'bottom', 'left_top', 'right_bottom'),
+            '6': ('top', 'middle', 'bottom', 'left_top', 'left_bottom', 'right_bottom'),
+            '7': ('top', 'right_top', 'right_bottom'),
+            '8': ('top', 'middle', 'bottom', 'left_top', 'left_bottom', 'right_top', 'right_bottom'),
+            '9': ('top', 'middle', 'bottom', 'left_top', 'right_top', 'right_bottom')
+        }
+        best_match, best_score = None, -1
+        for digit, pattern in digit_patterns.items():
+            matches = sum(1 for segment in pattern if segment_presence.get(segment, False))
+            non_matches = sum(1 for segment in segment_presence if segment not in pattern and segment_presence[segment])
+            score = matches - 0.2 * non_matches
+            if matches >= len(pattern) * 0.6:
+                score += 1.0
+            if score > best_score:
+                best_score = score
+                best_match = digit
+        logging.debug(f"Segment detection: {segment_presence}, Digit: {best_match}, Score: {best_score:.2f}")
+        return best_match
+    except Exception as e:
+        logging.error(f"Segment detection failed: {str(e)}")
+        return None
+def perform_ocr(img, roi_bbox):
+    """Perform OCR with EasyOCR and seven-segment fallback."""
+    if easyocr_reader is None:
+        logging.error("EasyOCR not initialized, cannot perform OCR.")
         return None, 0.0
     try:
+        thresh, enhanced = preprocess_image(img)
+        brightness = estimate_brightness(img)
+        # Dynamic EasyOCR parameters
+        results = easyocr_reader.readtext(thresh, detail=1, paragraph=False,
+                                          contrast_ths=0.1, adjust_contrast=1.5,
+                                          text_threshold=0.3, mag_ratio=3.0,
+                                          allowlist='0123456789.', batch_size=1, y_ths=0.2)
+        save_debug_image(thresh, "07_ocr_threshold")
+        logging.info(f"EasyOCR results: {results}")
+        if not results:
+            logging.info("EasyOCR failed, trying fallback parameters.")
+            results = easyocr_reader.readtext(thresh, detail=1, paragraph=False,
+                                              contrast_ths=0.05, adjust_contrast=2.0,
+                                              text_threshold=0.2, mag_ratio=4.0,
+                                              allowlist='0123456789.', batch_size=1, y_ths=0.2)
+            save_debug_image(thresh, "07_fallback_threshold")
+        digits_info = []
+        for (bbox, text, conf) in results:
+            (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
+            h_bbox = max(y1, y2, y3, y4) - min(y1, y2, y3, y4)
+            if (text.isdigit() or text == '.') and h_bbox > 10 and conf > 0.2:
+                x_min, x_max = int(min(x1, x4)), int(max(x2, x3))
+                y_min, y_max = int(min(y1, y2)), int(max(y3, y4))
+                digits_info.append((x_min, x_max, y_min, y_max, text, conf))
+        if digits_info:
+            digits_info.sort(key=lambda x: x[0])
+            recognized_text = ""
+            total_conf = 0.0
+            conf_count = 0
+            for idx, (x_min, x_max, y_min, y_max, char, conf) in enumerate(digits_info):
+                x_min, y_min = max(0, x_min), max(0, y_min)
+                x_max, y_max = min(thresh.shape[1], x_max), min(thresh.shape[0], y_max)
+                if x_max <= x_min or y_max <= y_min:
+                    continue
+                if conf < 0.7 and char != '.':
+                    digit_crop = thresh[y_min:y_max, x_min:x_max]
+                    save_debug_image(digit_crop, f"08_digit_crop_{idx}_{char}")
+                    segment_digit = detect_segments(digit_crop, brightness)
+                    if segment_digit:
+                        recognized_text += segment_digit
+                        total_conf += 0.85
+                        logging.debug(f"Used segment detection for char {char}: {segment_digit}")
+                    else:
+                        recognized_text += char
+                        total_conf += conf
+                    conf_count += 1
+                else:
+                    recognized_text += char
+                    total_conf += conf
+                    conf_count += 1
+            avg_conf = total_conf / conf_count if conf_count > 0 else 0.0
+            text = re.sub(r"[^\d\.]", "", recognized_text)
+            if text.count('.') > 1:
+                text = text.replace('.', '', text.count('.') - 1)
+            text = text.strip('.')
+            if text and re.fullmatch(r"^\d*\.?\d*$", text):
+                text = text.lstrip('0') or '0'
+                logging.info(f"Validated text: {text}, Confidence: {avg_conf:.2f}")
+                return text, avg_conf * 100
+        logging.info("No valid digits detected.")
         return None, 0.0
     except Exception as e:
         logging.error(f"OCR failed: {str(e)}")
         if roi_bbox:
             conf_threshold *= 1.1 if (roi_bbox[2] * roi_bbox[3]) > (img.shape[0] * img.shape[1] * 0.3) else 1.0
+        result, confidence = perform_ocr(roi_img, roi_bbox)
         if result and confidence >= conf_threshold * 100:
             try:
                 weight = float(result)
+                if 0.01 <= weight <= 1000:
                     logging.info(f"Detected weight: {result} kg, Confidence: {confidence:.2f}%")
                     return result, confidence
                 logging.warning(f"Weight {result} out of range.")
                 logging.warning(f"Invalid weight format: {result}")
         logging.info("Primary OCR failed, using full image fallback.")
+        result, confidence = perform_ocr(img, None)
         if result and confidence >= conf_threshold * 0.9 * 100:
             try:
                 weight = float(result)