Spaces:

Sanjayraju30
/

logger

Sleeping

File size: 3,684 Bytes

ee68036

import cv2
import numpy as np
import easyocr
import re
from typing import Tuple, List, Optional
from PIL import Image, ImageDraw

class WeightDetector:
    def __init__(self):
        """Initialize the OCR reader with English language support"""
        self.reader = easyocr.Reader(['en'])
        
    def preprocess_image(self, image_path: str) -> np.ndarray:
        """Preprocess the image for better OCR results"""
        img = cv2.imread(image_path)
        if img is None:
            raise ValueError("Could not read image from path")
            
        # Convert to grayscale
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        # Apply adaptive thresholding
        processed = cv2.adaptiveThreshold(
            gray, 255, 
            cv2.ADAPTIVE_THRESH_GAUSSIAN_C, 
            cv2.THRESH_BINARY, 11, 2
        )
        
        return processed
    
    def extract_weight_value(self, text: str) -> Optional[float]:
        """Extract weight value from text using regex patterns"""
        # Common weight patterns: 12.34g, 56.78 kg, 90.12 lbs, etc.
        patterns = [
            r'(\d+\.\d+)\s*(g|kg|grams|kilograms|lb|lbs|pounds)',
            r'(\d+)\s*(g|kg|grams|kilograms|lb|lbs|pounds)',
            r'(\d+\.\d+)',  # Just numbers with decimal
            r'(\d+)'        # Just whole numbers
        ]
        
        for pattern in patterns:
            match = re.search(pattern, text, re.IGNORECASE)
            if match:
                try:
                    return float(match.group(1))
                except ValueError:
                    continue
        return None
    
    def detect_weight(self, image_path: str) -> Tuple[Optional[float], List[dict], Image.Image]:
        """Detect weight from an image and return value, metadata, and annotated image"""
        try:
            # Read and preprocess image
            img = Image.open(image_path).convert("RGB")
            img_cv = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
            
            # Perform OCR
            results = self.reader.readtext(img_cv)
            
            # Find the most likely weight value
            detected_weights = []
            for (bbox, text, prob) in results:
                weight = self.extract_weight_value(text)
                if weight is not None:
                    detected_weights.append({
                        'weight': weight,
                        'text': text,
                        'probability': prob,
                        'bbox': bbox
                    })
            
            # Sort by probability and get the highest
            if detected_weights:
                detected_weights.sort(key=lambda x: x['probability'], reverse=True)
                best_match = detected_weights[0]
                
                # Draw bounding boxes on image
                draw = ImageDraw.Draw(img)
                for item in detected_weights:
                    bbox = item['bbox']
                    # Convert bbox coordinates to tuple of tuples
                    polygon = [(int(x), int(y)) for [x, y] in bbox]
                    draw.polygon(polygon, outline="red", width=2)
                    # Add text label
                    label = f"{item['weight']}g (p={item['probability']:.2f})"
                    draw.text((polygon[0][0], polygon[0][1] - 10), label, fill="red")
                
                return best_match['weight'], detected_weights, img
                
            return None, [], img
            
        except Exception as e:
            print(f"Error processing image: {e}")
            return None, [], Image.new("RGB", (100, 100), color="white")