AutoWeightLogger2

Sleeping

App Files Files Community

Sanjayraju30 commited on 11 days ago

Commit

b544f2d

verified ·

1 Parent(s): 1a5f8fd

Update ocr_engine.py

Browse files

Files changed (1) hide show

ocr_engine.py +8 -80

ocr_engine.py CHANGED Viewed

@@ -1,83 +1,11 @@
-import pytesseract
-import numpy as np
 import cv2
-import re
 from PIL import Image
-import logging
-# Set up logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-def preprocess_image(img):
-    """Preprocess image for robust OCR."""
-    try:
-        # Convert to OpenCV format
-        img = np.array(img)
-        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
-        # Convert to grayscale
-        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        # Estimate brightness for adaptive processing
-        brightness = np.mean(gray)
-        # Apply CLAHE for contrast enhancement
-        clahe_clip = 4.0 if brightness < 100 else 2.0
-        clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8))
-        enhanced = clahe.apply(gray)
-        # Apply adaptive thresholding
-        block_size = max(11, min(31, int(img.shape[0] / 20) * 2 + 1))
-        thresh = cv2.adaptiveThreshold(
-            enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, block_size, 2
-        )
-        # Noise reduction
-        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
-        thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
-        return thresh
-    except Exception as e:
-        logging.error(f"Preprocessing failed: {str(e)}")
-        return img
-def extract_weight_from_image(pil_img):
-    """Extract weight from any digital scale image."""
-    try:
-        # Convert PIL image to OpenCV
-        img = np.array(pil_img)
-        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
-        # Preprocess image
-        thresh = preprocess_image(img)
-        # Try multiple Tesseract configurations
-        configs = [
-            r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.',  # Single line
-            r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.'   # Block of text
-        ]
-        for config in configs:
-            text = pytesseract.image_to_string(thresh, config=config)
-            logging.info(f"Tesseract raw output (config {config}): {text}")
-            # Clean and validate text
-            text = re.sub(r"[^\d\.]", "", text)
-            if text.count('.') > 1:
-                text = text.replace('.', '', text.count('.') - 1)
-            text = text.strip('.')
-            if text and re.fullmatch(r"^\d*\.?\d*$", text):
-                text = text.lstrip('0') or '0'
-                confidence = 95.0 if len(text.replace('.', '')) >= 3 else 90.0
-                try:
-                    weight = float(text)
-                    if 0.001 <= weight <= 5000:
-                        logging.info(f"Detected weight: {text} kg, Confidence: {confidence:.2f}%")
-                        return text, confidence
-                except ValueError:
-                    logging.warning(f"Invalid weight format: {text}")
-        logging.info("No valid weight detected.")
-        return "Not detected", 0.0
-    except Exception as e:
-        logging.error(f"Weight extraction failed: {str(e)}")
-        return "Not detected", 0.0

 import cv2
+import pytesseract
 from PIL import Image
+import numpy as np
+def extract_weight(img_path):
+    """Extract weight from image path using Tesseract OCR."""
+    img = cv2.imread(img_path)
+    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    text = pytesseract.image_to_string(gray, config='--psm 7 digits')
+    return ''.join(filter(lambda x: x in '0123456789.', text))