AutoWeightLogger1

Running

App Files Files Community

Sanjayraju30 commited on 1 day ago

Commit

9f46cc7

verified ·

1 Parent(s): 6ae35d6

Update ocr_engine.py

Browse files

Files changed (1) hide show

ocr_engine.py +58 -104

ocr_engine.py CHANGED Viewed

@@ -1,16 +1,18 @@
-import easyocr
 import numpy as np
 import cv2
 import re
 import logging
 from datetime import datetime
 import os
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-# Initialize EasyOCR
-easyocr_reader = easyocr.Reader(['en'], gpu=False)
 # Directory for debug images
 DEBUG_DIR = "debug_images"
@@ -20,7 +22,9 @@ def save_debug_image(img, filename_suffix, prefix=""):
     """Save image to debug directory with timestamp."""
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
     filename = os.path.join(DEBUG_DIR, f"{prefix}{timestamp}_{filename_suffix}.png")
-    if len(img.shape) == 3:
         cv2.imwrite(filename, cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
     else:
         cv2.imwrite(filename, img)
@@ -34,30 +38,36 @@ def estimate_brightness(img):
 def preprocess_image(img):
     """Preprocess image for OCR with enhanced contrast and noise reduction."""
     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-    # Apply Gaussian blur to reduce noise
-    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
-    save_debug_image(blurred, "01_preprocess_blur")
-    # Use adaptive histogram equalization for better contrast
-    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
-    enhanced = clahe.apply(blurred)
-    save_debug_image(enhanced, "02_preprocess_clahe")
-    # Morphological operations to enhance digits
     kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
-    morphed = cv2.morphologyEx(enhanced, cv2.MORPH_CLOSE, kernel)
-    save_debug_image(morphed, "03_preprocess_morph")
-    return morphed
 def correct_rotation(img):
     """Correct image rotation using edge detection."""
     try:
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        blurred = cv2.GaussianBlur(gray, (5, 5), 0)
-        edges = cv2.Canny(blurred, 50, 150)
         lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=50, minLineLength=30, maxLineGap=10)
         if lines is not None:
             angles = [np.arctan2(line[0][3] - line[0][1], line[0][2] - line[0][0]) * 180 / np.pi for line in lines]
             angle = np.median(angles)
-            if abs(angle) > 1.5:
                 h, w = img.shape[:2]
                 center = (w // 2, h // 2)
                 M = cv2.getRotationMatrix2D(center, angle, 1.0)
@@ -73,17 +83,8 @@ def detect_roi(img):
     """Detect region of interest (display) with refined contour filtering."""
     try:
         save_debug_image(img, "04_original")
-        preprocessed = preprocess_image(img)
         brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        # Dynamic block size based on image dimensions
-        block_size = max(11, min(31, int(img.shape[0] / 20) * 2 + 1))
-        thresh = cv2.adaptiveThreshold(preprocessed, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
-                                       cv2.THRESH_BINARY_INV, block_size, 2)
-        save_debug_image(thresh, "05_roi_threshold")
-        # Morphological operations to connect digit segments
-        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
-        thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
-        save_debug_image(thresh, "06_roi_morph")
         contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
         if contours:
@@ -94,101 +95,54 @@ def detect_roi(img):
                 x, y, w, h = cv2.boundingRect(c)
                 roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
                 aspect_ratio = w / h
-                # Relaxed constraints for ROI detection
-                if (100 < area < (img_area * 0.9) and
-                    0.3 <= aspect_ratio <= 20.0 and w > 40 and h > 15 and roi_brightness > 20):
                     valid_contours.append((c, area * roi_brightness))
                     logging.debug(f"Contour: Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
             if valid_contours:
                 contour, _ = max(valid_contours, key=lambda x: x[1])
                 x, y, w, h = cv2.boundingRect(contour)
-                # Dynamic padding based on ROI size
-                padding = max(10, min(50, int(min(w, h) * 0.2)))
                 x, y = max(0, x - padding), max(0, y - padding)
                 w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
                 roi_img = img[y:y+h, x:x+w]
-                save_debug_image(roi_img, "07_detected_roi")
                 logging.info(f"Detected ROI: ({x}, {y}, {w}, {h})")
                 return roi_img, (x, y, w, h)
         logging.info("No ROI found, using full image.")
-        save_debug_image(img, "07_no_roi_fallback")
         return img, None
     except Exception as e:
         logging.error(f"ROI detection failed: {str(e)}")
-        save_debug_image(img, "07_roi_error_fallback")
         return img, None
-def perform_ocr(img, roi_bbox):
-    """Perform OCR optimized for digital displays."""
     try:
-        preprocessed = preprocess_image(img)
-        brightness = estimate_brightness(img)
-        # Dynamic thresholding based on brightness
-        thresh_value = 0 if brightness < 50 else (127 if brightness < 100 else 200)
-        _, thresh = cv2.threshold(preprocessed, thresh_value, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
-        save_debug_image(thresh, "08_ocr_threshold")
-        # Morphological operations to clean up digits
-        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
-        thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
-        save_debug_image(thresh, "09_ocr_morph")
-        # Optimized EasyOCR parameters for seven-segment displays
-        results = easyocr_reader.readtext(thresh, detail=1, paragraph=False,
-                                          contrast_ths=0.1, adjust_contrast=1.5,
-                                          text_threshold=0.2, mag_ratio=3.0,
-                                          allowlist='0123456789.', batch_size=1, y_ths=0.2)
-        logging.info(f"EasyOCR results: {results}")
-        if not results:
-            logging.info("No text detected, trying fallback parameters.")
-            results = easyocr_reader.readtext(thresh, detail=1, paragraph=False,
-                                              contrast_ths=0.05, adjust_contrast=2.0,
-                                              text_threshold=0.1, mag_ratio=4.0,
-                                              allowlist='0123456789.', batch_size=1, y_ths=0.2)
-            save_debug_image(thresh, "09_fallback_threshold")
-        if not results:
-            logging.info("No digits found.")
-            return None, 0.0
-        digits_info = []
-        for (bbox, text, conf) in results:
-            (x1, y1), (x2, y2), (x3, y3), (x4, y4) = bbox
-            h_bbox = max(y1, y2, y3, y4) - min(y1, y2, y3, y4)
-            if (text.isdigit() or text == '.') and h_bbox > 5 and conf > 0.1:
-                x_min, x_max = int(min(x1, x4)), int(max(x2, x3))
-                y_min, y_max = int(min(y1, y2)), int(max(y3, y4))
-                digits_info.append((x_min, x_max, y_min, y_max, text, conf))
-        if not digits_info:
-            logging.info("No valid digits after filtering.")
-            return None, 0.0
-        digits_info.sort(key=lambda x: x[0])
-        recognized_text = ""
-        total_conf = 0.0
-        conf_count = 0
-        for _, _, _, _, char, conf in digits_info:
-            recognized_text += char
-            total_conf += conf
-            conf_count += 1
-        avg_conf = total_conf / conf_count if conf_count > 0 else 0.0
-        logging.info(f"Recognized text: {recognized_text}, Average confidence: {avg_conf:.2f}")
-        # Validate and clean the recognized text
-        text = re.sub(r"[^\d\.]", "", recognized_text)
         if text.count('.') > 1:
             text = text.replace('.', '', text.count('.') - 1)
         text = text.strip('.')
         if text and re.fullmatch(r"^\d*\.?\d*$", text):
             text = text.lstrip('0') or '0'
-            if text == '0' and avg_conf < 0.9:
-                avg_conf *= 0.7
-            return text, avg_conf * 100
-        logging.info(f"Text '{recognized_text}' failed validation.")
         return None, 0.0
     except Exception as e:
         logging.error(f"OCR failed: {str(e)}")
@@ -202,13 +156,13 @@ def extract_weight_from_image(pil_img):
         save_debug_image(img, "00_input_image")
         img = correct_rotation(img)
         brightness = estimate_brightness(img)
-        conf_threshold = 0.5 if brightness > 120 else (0.3 if brightness > 60 else 0.2)
         roi_img, roi_bbox = detect_roi(img)
         if roi_bbox:
-            conf_threshold *= 1.1 if (roi_bbox[2] * roi_bbox[3]) > (img.shape[0] * img.shape[1] * 0.4) else 1.0
-        result, confidence = perform_ocr(roi_img, roi_bbox)
         if result and confidence >= conf_threshold * 100:
             try:
                 weight = float(result)
@@ -220,8 +174,8 @@ def extract_weight_from_image(pil_img):
                 logging.warning(f"Invalid weight format: {result}")
         logging.info("Primary OCR failed, using full image fallback.")
-        result, confidence = perform_ocr(img, None)
-        if result and confidence >= conf_threshold * 0.8 * 100:
             try:
                 weight = float(result)
                 if 0.00001 <= weight <= 10000:

 import numpy as np
 import cv2
 import re
 import logging
 from datetime import datetime
 import os
+from PIL import Image
+from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# Initialize TrOCR
+processor = TrOCRProcessor.from_pretrained("microsoft/trocr-small-printed")
+model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-small-printed")
 # Directory for debug images
 DEBUG_DIR = "debug_images"
     """Save image to debug directory with timestamp."""
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
     filename = os.path.join(DEBUG_DIR, f"{prefix}{timestamp}_{filename_suffix}.png")
+    if isinstance(img, Image.Image):
+        img.save(filename)
+    elif len(img.shape) == 3:
         cv2.imwrite(filename, cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
     else:
         cv2.imwrite(filename, img)
 def preprocess_image(img):
     """Preprocess image for OCR with enhanced contrast and noise reduction."""
     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    # Dynamic contrast adjustment based on brightness
+    brightness = estimate_brightness(img)
+    clahe_clip = 4.0 if brightness < 100 else 2.0
+    clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8))
+    enhanced = clahe.apply(gray)
+    save_debug_image(enhanced, "01_preprocess_clahe")
+    # Gaussian blur to reduce noise
+    blurred = cv2.GaussianBlur(enhanced, (3, 3), 0)
+    save_debug_image(blurred, "02_preprocess_blur")
+    # Adaptive thresholding for digit segmentation
+    block_size = max(11, min(31, int(img.shape[0] / 20) * 2 + 1))
+    thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+                                   cv2.THRESH_BINARY_INV, block_size, 2)
+    # Morphological operations to clean up digits
     kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
+    thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
+    thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
+    save_debug_image(thresh, "03_preprocess_morph")
+    return thresh, enhanced
 def correct_rotation(img):
     """Correct image rotation using edge detection."""
     try:
         gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        edges = cv2.Canny(gray, 50, 150)
         lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=50, minLineLength=30, maxLineGap=10)
         if lines is not None:
             angles = [np.arctan2(line[0][3] - line[0][1], line[0][2] - line[0][0]) * 180 / np.pi for line in lines]
             angle = np.median(angles)
+            if abs(angle) > 1.0:
                 h, w = img.shape[:2]
                 center = (w // 2, h // 2)
                 M = cv2.getRotationMatrix2D(center, angle, 1.0)
     """Detect region of interest (display) with refined contour filtering."""
     try:
         save_debug_image(img, "04_original")
+        thresh, enhanced = preprocess_image(img)
         brightness_map = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
         contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
         if contours:
                 x, y, w, h = cv2.boundingRect(c)
                 roi_brightness = np.mean(brightness_map[y:y+h, x:x+w])
                 aspect_ratio = w / h
+                # Relaxed constraints for digital displays
+                if (200 < area < (img_area * 0.8) and
+                    0.5 <= aspect_ratio <= 15.0 and w > 50 and h > 20 and roi_brightness > 30):
                     valid_contours.append((c, area * roi_brightness))
                     logging.debug(f"Contour: Area={area}, Aspect={aspect_ratio:.2f}, Brightness={roi_brightness:.2f}")
             if valid_contours:
                 contour, _ = max(valid_contours, key=lambda x: x[1])
                 x, y, w, h = cv2.boundingRect(contour)
+                padding = max(15, min(50, int(min(w, h) * 0.3)))
                 x, y = max(0, x - padding), max(0, y - padding)
                 w, h = min(w + 2 * padding, img.shape[1] - x), min(h + 2 * padding, img.shape[0] - y)
                 roi_img = img[y:y+h, x:x+w]
+                save_debug_image(roi_img, "05_detected_roi")
                 logging.info(f"Detected ROI: ({x}, {y}, {w}, {h})")
                 return roi_img, (x, y, w, h)
         logging.info("No ROI found, using full image.")
+        save_debug_image(img, "05_no_roi_fallback")
         return img, None
     except Exception as e:
         logging.error(f"ROI detection failed: {str(e)}")
+        save_debug_image(img, "05_roi_error_fallback")
         return img, None
+def perform_ocr(img):
+    """Perform OCR using TrOCR for digital displays."""
     try:
+        # Convert to PIL for TrOCR
+        pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
+        save_debug_image(pil_img, "06_ocr_input")
+        # Process image with TrOCR
+        pixel_values = processor(pil_img, return_tensors="pt").pixel_values
+        generated_ids = model.generate(pixel_values)
+        text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        logging.info(f"TrOCR raw output: {text}")
+        # Clean and validate text
+        text = re.sub(r"[^\d\.]", "", text)
         if text.count('.') > 1:
             text = text.replace('.', '', text.count('.') - 1)
         text = text.strip('.')
         if text and re.fullmatch(r"^\d*\.?\d*$", text):
             text = text.lstrip('0') or '0'
+            confidence = 95.0 if len(text.replace('.', '')) > 1 else 90.0
+            logging.info(f"Validated text: {text}, Confidence: {confidence:.2f}%")
+            return text, confidence
+        logging.info(f"Text '{text}' failed validation.")
         return None, 0.0
     except Exception as e:
         logging.error(f"OCR failed: {str(e)}")
         save_debug_image(img, "00_input_image")
         img = correct_rotation(img)
         brightness = estimate_brightness(img)
+        conf_threshold = 0.6 if brightness > 100 else 0.4
         roi_img, roi_bbox = detect_roi(img)
         if roi_bbox:
+            conf_threshold *= 1.2 if (roi_bbox[2] * roi_bbox[3]) > (img.shape[0] * img.shape[1] * 0.3) else 1.0
+        result, confidence = perform_ocr(roi_img)
         if result and confidence >= conf_threshold * 100:
             try:
                 weight = float(result)
                 logging.warning(f"Invalid weight format: {result}")
         logging.info("Primary OCR failed, using full image fallback.")
+        result, confidence = perform_ocr(img)
+        if result and confidence >= conf_threshold * 0.9 * 100:
             try:
                 weight = float(result)
                 if 0.00001 <= weight <= 10000: