logger1

Running

App Files Files Community

Sanjayraju30 commited on 12 days ago

Commit

ee68036

verified ·

1 Parent(s): f4861ec

Rename ocr_engine.py to weight_detector.py

Browse files

Files changed (2) hide show

ocr_engine.py +0 -95
weight_detector.py +94 -0

ocr_engine.py DELETED Viewed

@@ -1,95 +0,0 @@
-import easyocr
-import numpy as np
-import cv2
-import re
-from PIL import Image
-import logging
-import sys
-# Set up logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler(sys.stdout)])
-# Initialize EasyOCR reader (once at module level for efficiency)
-reader = easyocr.Reader(['en'], gpu=False)  # GPU=False for CPU-only environments like Hugging Face Spaces
-def preprocess_image(img):
-    """Preprocess image for robust OCR with EasyOCR, optimized for weight displays."""
-    try:
-        # Convert PIL to OpenCV format
-        img = np.array(img)
-        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
-        # Convert to grayscale
-        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        # Enhance contrast for diverse lighting conditions
-        clahe = cv2.createCLAHE(clipLimit=4.0, tileGridSize=(8, 8))
-        enhanced = clahe.apply(gray)
-        # Apply adaptive thresholding
-        block_size = max(11, min(31, int(img.shape[0] / 15) * 2 + 1))
-        thresh = cv2.adaptiveThreshold(
-            enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, block_size, 2
-        )
-        return thresh  # EasyOCR handles further processing
-    except Exception as e:
-        logging.error(f"Preprocessing failed: {str(e)}")
-        return gray
-def extract_weight_from_image(pil_img):
-    """Extract weight and unit from a digital scale image using EasyOCR."""
-    try:
-        # Preprocess image
-        thresh = preprocess_image(pil_img)
-        # Convert to numpy array for EasyOCR
-        img_np = np.array(thresh)
-        # Use EasyOCR to detect text
-        results = reader.readtext(img_np, detail=1, paragraph=False)
-        logging.info(f"EasyOCR raw output: {results}")
-        # Extract weight and unit from detected text
-        text = " ".join([result[1] for result in results])  # Combine all detected text
-        text = text.strip().lower()
-        text = re.sub(r'\s+', ' ', text)  # Normalize spaces
-        # Extract weight and unit, prioritizing common formats
-        match = re.search(r'(-?\d*\.?\d+)([kgkg]?)', text)
-        if match:
-            weight_str = match.group(1)
-            unit = match.group(2) if match.group(2) else "g"  # Default to grams if no unit
-            weight_str = weight_str.replace(',', '.')  # Handle decimal formats (e.g., 68,0)
-            if re.fullmatch(r'^-?\d*\.?\d+$', weight_str):
-                weight_str = weight_str.lstrip('0') or '0'
-                confidence = min([result[2] for result in results if result[1]] or [0.0]) * 100  # Convert EasyOCR confidence (0-1) to percentage
-                try:
-                    weight = float(weight_str)
-                    if -5000 <= weight <= 5000:
-                        logging.info(f"Detected weight: {weight} {unit}, Confidence: {confidence:.2f}%")
-                        return weight_str, confidence, unit
-                except ValueError:
-                    logging.warning(f"Invalid weight format: {weight_str}")
-        # Fallback to detect numbers without units if no match
-        match_no_unit = re.search(r'(-?\d*\.?\d+)', text)
-        if match_no_unit and not match:
-            weight_str = match_no_unit.group(1)
-            weight_str = weight_str.replace(',', '.')
-            if re.fullmatch(r'^-?\d*\.?\d+$', weight_str):
-                weight_str = weight_str.lstrip('0') or '0'
-                confidence = min([result[2] for result in results if result[1]] or [0.0]) * 100
-                try:
-                    weight = float(weight_str)
-                    if -5000 <= weight <= 5000:
-                        logging.info(f"Detected weight (no unit): {weight} g, Confidence: {confidence:.2f}%")
-                        return weight_str, confidence, "g"
-                except ValueError:
-                    logging.warning(f"Invalid weight format: {weight_str}")
-        logging.info("No valid weight detected.")
-        return "Not detected", 0.0, ""
-    except Exception as e:
-        logging.error(f"Weight extraction failed: {str(e)}")
-        return "Not detected", 0.0, ""

weight_detector.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import cv2
+import numpy as np
+import easyocr
+import re
+from typing import Tuple, List, Optional
+from PIL import Image, ImageDraw
+class WeightDetector:
+    def __init__(self):
+        """Initialize the OCR reader with English language support"""
+        self.reader = easyocr.Reader(['en'])
+    def preprocess_image(self, image_path: str) -> np.ndarray:
+        """Preprocess the image for better OCR results"""
+        img = cv2.imread(image_path)
+        if img is None:
+            raise ValueError("Could not read image from path")
+        # Convert to grayscale
+        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        # Apply adaptive thresholding
+        processed = cv2.adaptiveThreshold(
+            gray, 255,
+            cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+            cv2.THRESH_BINARY, 11, 2
+        )
+        return processed
+    def extract_weight_value(self, text: str) -> Optional[float]:
+        """Extract weight value from text using regex patterns"""
+        # Common weight patterns: 12.34g, 56.78 kg, 90.12 lbs, etc.
+        patterns = [
+            r'(\d+\.\d+)\s*(g|kg|grams|kilograms|lb|lbs|pounds)',
+            r'(\d+)\s*(g|kg|grams|kilograms|lb|lbs|pounds)',
+            r'(\d+\.\d+)',  # Just numbers with decimal
+            r'(\d+)'        # Just whole numbers
+        ]
+        for pattern in patterns:
+            match = re.search(pattern, text, re.IGNORECASE)
+            if match:
+                try:
+                    return float(match.group(1))
+                except ValueError:
+                    continue
+        return None
+    def detect_weight(self, image_path: str) -> Tuple[Optional[float], List[dict], Image.Image]:
+        """Detect weight from an image and return value, metadata, and annotated image"""
+        try:
+            # Read and preprocess image
+            img = Image.open(image_path).convert("RGB")
+            img_cv = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
+            # Perform OCR
+            results = self.reader.readtext(img_cv)
+            # Find the most likely weight value
+            detected_weights = []
+            for (bbox, text, prob) in results:
+                weight = self.extract_weight_value(text)
+                if weight is not None:
+                    detected_weights.append({
+                        'weight': weight,
+                        'text': text,
+                        'probability': prob,
+                        'bbox': bbox
+                    })
+            # Sort by probability and get the highest
+            if detected_weights:
+                detected_weights.sort(key=lambda x: x['probability'], reverse=True)
+                best_match = detected_weights[0]
+                # Draw bounding boxes on image
+                draw = ImageDraw.Draw(img)
+                for item in detected_weights:
+                    bbox = item['bbox']
+                    # Convert bbox coordinates to tuple of tuples
+                    polygon = [(int(x), int(y)) for [x, y] in bbox]
+                    draw.polygon(polygon, outline="red", width=2)
+                    # Add text label
+                    label = f"{item['weight']}g (p={item['probability']:.2f})"
+                    draw.text((polygon[0][0], polygon[0][1] - 10), label, fill="red")
+                return best_match['weight'], detected_weights, img
+            return None, [], img
+        except Exception as e:
+            print(f"Error processing image: {e}")
+            return None, [], Image.new("RGB", (100, 100), color="white")