AutoWeightLogger1

Running

App Files Files Community

Sanjayraju30 commited on 1 day ago

Commit

e58b1c2

verified ·

1 Parent(s): 1d1e3da

Update ocr_engine.py

Browse files

Files changed (1) hide show

ocr_engine.py +59 -77

ocr_engine.py CHANGED Viewed

@@ -1,22 +1,15 @@
-import easyocr
 import numpy as np
 import cv2
 import re
 import logging
 from datetime import datetime
 import os
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-# Initialize EasyOCR
-try:
-    easyocr_reader = easyocr.Reader(['en'], gpu=False)
-    logging.info("EasyOCR initialized successfully")
-except Exception as e:
-    logging.error(f"Failed to initialize EasyOCR: {str(e)}")
-    easyocr_reader = None
 # Directory for debug images
 DEBUG_DIR = "debug_images"
 os.makedirs(DEBUG_DIR, exist_ok=True)
@@ -25,7 +18,9 @@ def save_debug_image(img, filename_suffix, prefix=""):
     """Save image to debug directory with timestamp."""
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
     filename = os.path.join(DEBUG_DIR, f"{prefix}{timestamp}_{filename_suffix}.png")
-    if len(img.shape) == 3:
         cv2.imwrite(filename, cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
     else:
         cv2.imwrite(filename, img)
@@ -40,19 +35,19 @@ def preprocess_image(img):
     """Preprocess image for OCR with enhanced contrast and noise reduction."""
     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
     brightness = estimate_brightness(img)
-    # Dynamic CLAHE based on brightness
-    clahe_clip = 4.0 if brightness < 80 else 2.0
     clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8))
     enhanced = clahe.apply(gray)
     save_debug_image(enhanced, "01_preprocess_clahe")
-    # Gaussian blur to reduce noise
     blurred = cv2.GaussianBlur(enhanced, (3, 3), 0)
     save_debug_image(blurred, "02_preprocess_blur")
-    # Adaptive thresholding with dynamic block size
     block_size = max(11, min(31, int(img.shape[0] / 15) * 2 + 1))
     thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                    cv2.THRESH_BINARY_INV, block_size, 5)
-    # Morphological operations to enhance digits
     kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
     thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
     thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
@@ -86,8 +81,7 @@ def detect_roi(img):
         save_debug_image(img, "04_original")
         thresh, enhanced = preprocess_image(img)
         brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        # Try multiple block sizes for robust ROI detection
-        block_sizes = [max(11, min(31, int(img.shape[0] / s) * 2 + 1)) for s in [15, 20, 25]]
         valid_contours = []
         img_area = img.shape[0] * img.shape[1]
@@ -104,15 +98,15 @@ def detect_roi(img):
                 x, y, w, h = cv2.boundingRect(c)
                 roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
                 aspect_ratio = w / h
-                if (300 < area < (img_area * 0.7) and
-                    0.5 <= aspect_ratio <= 10.0 and w > 60 and h > 25 and roi_brightness > 40):
                     valid_contours.append((c, area * roi_brightness))
                     logging.debug(f"Contour (block {block_size}): Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
         if valid_contours:
             contour, _ = max(valid_contours, key=lambda x: x[1])
             x, y, w, h = cv2.boundingRect(contour)
-            padding = max(20, min(60, int(min(w, h) * 0.3)))
             x, y = max(0, x - padding), max(0, y - padding)
             w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
             roi_img = img[y:y+h, x:x+w]
@@ -132,12 +126,11 @@ def detect_segments(digit_img, brightness):
     """Detect seven-segment digits with adaptive thresholds."""
     try:
         h, w = digit_img.shape
-        if h < 10 or w < 5:
             logging.debug("Digit image too small for segment detection.")
             return None
-        # Dynamic segment threshold based on brightness
-        segment_threshold = 0.2 if brightness < 80 else 0.3
         segments = {
             'top': (int(w*0.1), int(w*0.9), 0, int(h*0.25)),
             'middle': (int(w*0.1), int(w*0.9), int(h*0.45), int(h*0.55)),
@@ -178,9 +171,9 @@ def detect_segments(digit_img, brightness):
         for digit, pattern in digit_patterns.items():
             matches = sum(1 for segment in pattern if segment_presence.get(segment, False))
             non_matches = sum(1 for segment in segment_presence if segment not in pattern and segment_presence[segment])
-            score = matches - 0.2 * non_matches
-            if matches >= len(pattern) * 0.6:
-                score += 1.0
             if score > best_score:
                 best_score = score
                 best_match = digit
@@ -191,74 +184,63 @@ def detect_segments(digit_img, brightness):
         return None
 def perform_ocr(img, roi_bbox):
-    """Perform OCR with EasyOCR and seven-segment fallback."""
-    if easyocr_reader is None:
-        logging.error("EasyOCR not initialized, cannot perform OCR.")
-        return None, 0.0
     try:
         thresh, enhanced = preprocess_image(img)
         brightness = estimate_brightness(img)
-        # Dynamic EasyOCR parameters
-        results = easyocr_reader.readtext(thresh, detail=1, paragraph=False,
-                                          contrast_ths=0.1, adjust_contrast=1.5,
-                                          text_threshold=0.3, mag_ratio=3.0,
-                                          allowlist='0123456789.', batch_size=1, y_ths=0.2)
-        save_debug_image(thresh, "07_ocr_threshold")
-        logging.info(f"EasyOCR results: {results}")
-        if not results:
-            logging.info("EasyOCR failed, trying fallback parameters.")
-            results = easyocr_reader.readtext(thresh, detail=1, paragraph=False,
-                                              contrast_ths=0.05, adjust_contrast=2.0,
-                                              text_threshold=0.2, mag_ratio=4.0,
-                                              allowlist='0123456789.', batch_size=1, y_ths=0.2)
-            save_debug_image(thresh, "07_fallback_threshold")
         digits_info = []
-        for (bbox, text, conf) in results:
-            (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
-            h_bbox = max(y1, y2, y3, y4) - min(y1, y2, y3, y4)
-            if (text.isdigit() or text == '.') and h_bbox > 10 and conf > 0.2:
-                x_min, x_max = int(min(x1, x4)), int(max(x2, x3))
-                y_min, y_max = int(min(y1, y2)), int(max(y3, y4))
-                digits_info.append((x_min, x_max, y_min, y_max, text, conf))
         if digits_info:
             digits_info.sort(key=lambda x: x[0])
             recognized_text = ""
-            total_conf = 0.0
-            conf_count = 0
-            for idx, (x_min, x_max, y_min, y_max, char, conf) in enumerate(digits_info):
                 x_min, y_min = max(0, x_min), max(0, y_min)
                 x_max, y_max = min(thresh.shape[1], x_max), min(thresh.shape[0], y_max)
                 if x_max <= x_min or y_max <= y_min:
                     continue
-                if conf < 0.7 and char != '.':
-                    digit_crop = thresh[y_min:y_max, x_min:x_max]
-                    save_debug_image(digit_crop, f"08_digit_crop_{idx}_{char}")
-                    segment_digit = detect_segments(digit_crop, brightness)
-                    if segment_digit:
-                        recognized_text += segment_digit
-                        total_conf += 0.85
-                        logging.debug(f"Used segment detection for char {char}: {segment_digit}")
-                    else:
-                        recognized_text += char
-                        total_conf += conf
-                    conf_count += 1
-                else:
-                    recognized_text += char
-                    total_conf += conf
-                    conf_count += 1
-            avg_conf = total_conf / conf_count if conf_count > 0 else 0.0
             text = re.sub(r"[^\d\.]", "", recognized_text)
             if text.count('.') > 1:
                 text = text.replace('.', '', text.count('.') - 1)
             text = text.strip('.')
             if text and re.fullmatch(r"^\d*\.?\d*$", text):
                 text = text.lstrip('0') or '0'
-                logging.info(f"Validated text: {text}, Confidence: {avg_conf:.2f}")
-                return text, avg_conf * 100
         logging.info("No valid digits detected.")
         return None, 0.0
     except Exception as e:
@@ -273,7 +255,7 @@ def extract_weight_from_image(pil_img):
         save_debug_image(img, "00_input_image")
         img = correct_rotation(img)
         brightness = estimate_brightness(img)
-        conf_threshold = 0.7 if brightness > 100 else 0.5
         roi_img, roi_bbox = detect_roi(img)
         if roi_bbox:

+import pytesseract
 import numpy as np
 import cv2
 import re
 import logging
 from datetime import datetime
 import os
+from PIL import Image
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # Directory for debug images
 DEBUG_DIR = "debug_images"
 os.makedirs(DEBUG_DIR, exist_ok=True)
     """Save image to debug directory with timestamp."""
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
     filename = os.path.join(DEBUG_DIR, f"{prefix}{timestamp}_{filename_suffix}.png")
+    if isinstance(img, Image.Image):
+        img.save(filename)
+    elif len(img.shape) == 3:
         cv2.imwrite(filename, cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
     else:
         cv2.imwrite(filename, img)
     """Preprocess image for OCR with enhanced contrast and noise reduction."""
     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
     brightness = estimate_brightness(img)
+    # Dynamic CLAHE
+    clahe_clip = 5.0 if brightness < 80 else 3.0
     clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8))
     enhanced = clahe.apply(gray)
     save_debug_image(enhanced, "01_preprocess_clahe")
+    # Gaussian blur
     blurred = cv2.GaussianBlur(enhanced, (3, 3), 0)
     save_debug_image(blurred, "02_preprocess_blur")
+    # Dynamic thresholding
     block_size = max(11, min(31, int(img.shape[0] / 15) * 2 + 1))
     thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                    cv2.THRESH_BINARY_INV, block_size, 5)
+    # Morphological operations
     kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
     thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
     thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=2)
         save_debug_image(img, "04_original")
         thresh, enhanced = preprocess_image(img)
         brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        block_sizes = [max(11, min(31, int(img.shape[0] / s) * 2 + 1)) for s in [12, 15, 18]]
         valid_contours = []
         img_area = img.shape[0] * img.shape[1]
                 x, y, w, h = cv2.boundingRect(c)
                 roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
                 aspect_ratio = w / h
+                if (400 < area < (img_area * 0.6) and
+                    0.5 <= aspect_ratio <= 8.0 and w > 70 and h > 30 and roi_brightness > 50):
                     valid_contours.append((c, area * roi_brightness))
                     logging.debug(f"Contour (block {block_size}): Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
         if valid_contours:
             contour, _ = max(valid_contours, key=lambda x: x[1])
             x, y, w, h = cv2.boundingRect(contour)
+            padding = max(20, min(60, int(min(w, h) * 0.4)))
             x, y = max(0, x - padding), max(0, y - padding)
             w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
             roi_img = img[y:y+h, x:x+w]
     """Detect seven-segment digits with adaptive thresholds."""
     try:
         h, w = digit_img.shape
+        if h < 15 or w < 8:
             logging.debug("Digit image too small for segment detection.")
             return None
+        segment_threshold = 0.25 if brightness < 80 else 0.35
         segments = {
             'top': (int(w*0.1), int(w*0.9), 0, int(h*0.25)),
             'middle': (int(w*0.1), int(w*0.9), int(h*0.45), int(h*0.55)),
         for digit, pattern in digit_patterns.items():
             matches = sum(1 for segment in pattern if segment_presence.get(segment, False))
             non_matches = sum(1 for segment in segment_presence if segment not in pattern and segment_presence[segment])
+            score = matches - 0.15 * non_matches
+            if matches >= len(pattern) * 0.65:
+                score += 1.2
             if score > best_score:
                 best_score = score
                 best_match = digit
         return None
 def perform_ocr(img, roi_bbox):
+    """Perform OCR with Tesseract and seven-segment fallback."""
     try:
         thresh, enhanced = preprocess_image(img)
         brightness = estimate_brightness(img)
+        pil_img = Image.fromarray(enhanced)
+        save_debug_image(pil_img, "07_ocr_input")
+        # Tesseract OCR with numeric config
+        custom_config = r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.'
+        text = pytesseract.image_to_string(pil_img, config=custom_config)
+        logging.info(f"Tesseract raw output: {text}")
+        # Clean and validate text
+        text = re.sub(r"[^\d\.]", "", text)
+        if text.count('.') > 1:
+            text = text.replace('.', '', text.count('.') - 1)
+        text = text.strip('.')
+        if text and re.fullmatch(r"^\d*\.?\d*$", text):
+            text = text.lstrip('0') or '0'
+            confidence = 95.0 if len(text.replace('.', '')) >= 2 else 90.0
+            logging.info(f"Validated Tesseract text: {text}, Confidence: {confidence:.2f}%")
+            return text, confidence
+        # Fallback to seven-segment detection
+        logging.info("Tesseract failed, using seven-segment detection.")
+        contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
         digits_info = []
+        for c in contours:
+            x, y, w, h = cv2.boundingRect(c)
+            if w > 10 and h > 15 and 0.2 <= w/h <= 1.5:
+                digits_info.append((x, x+w, y, y+h))
         if digits_info:
             digits_info.sort(key=lambda x: x[0])
             recognized_text = ""
+            for idx, (x_min, x_max, y_min, y_max) in enumerate(digits_info):
                 x_min, y_min = max(0, x_min), max(0, y_min)
                 x_max, y_max = min(thresh.shape[1], x_max), min(thresh.shape[0], y_max)
                 if x_max <= x_min or y_max <= y_min:
                     continue
+                digit_crop = thresh[y_min:y_max, x_min:x_max]
+                save_debug_image(digit_crop, f"08_digit_crop_{idx}")
+                segment_digit = detect_segments(digit_crop, brightness)
+                if segment_digit:
+                    recognized_text += segment_digit
+                elif idx < len(digits_info) - 1 and (digits_info[idx+1][0] - x_max) < 10:
+                    recognized_text += '.'  # Assume decimal point for close digits
             text = re.sub(r"[^\d\.]", "", recognized_text)
             if text.count('.') > 1:
                 text = text.replace('.', '', text.count('.') - 1)
             text = text.strip('.')
             if text and re.fullmatch(r"^\d*\.?\d*$", text):
                 text = text.lstrip('0') or '0'
+                confidence = 90.0
+                logging.info(f"Validated segment text: {text}, Confidence: {confidence:.2f}%")
+                return text, confidence
         logging.info("No valid digits detected.")
         return None, 0.0
     except Exception as e:
         save_debug_image(img, "00_input_image")
         img = correct_rotation(img)
         brightness = estimate_brightness(img)
+        conf_threshold = 0.8 if brightness > 100 else 0.6
         roi_img, roi_bbox = detect_roi(img)
         if roi_bbox: