logger1

Running

File size: 4,214 Bytes

import easyocr
import numpy as np
import cv2
import re
from PIL import Image
import logging
import sys

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler(sys.stdout)])

# Initialize EasyOCR reader (once at module level for efficiency)
reader = easyocr.Reader(['en'], gpu=False)  # GPU=False for CPU-only environments like Hugging Face Spaces

def preprocess_image(img):
    """Preprocess image for robust OCR with EasyOCR, optimized for weight displays."""
    try:
        # Convert PIL to OpenCV format
        img = np.array(img)
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        
        # Convert to grayscale
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        # Enhance contrast for diverse lighting conditions
        clahe = cv2.createCLAHE(clipLimit=4.0, tileGridSize=(8, 8))
        enhanced = clahe.apply(gray)
        
        # Apply adaptive thresholding
        block_size = max(11, min(31, int(img.shape[0] / 15) * 2 + 1))
        thresh = cv2.adaptiveThreshold(
            enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, block_size, 2
        )
        
        return thresh  # EasyOCR handles further processing
    except Exception as e:
        logging.error(f"Preprocessing failed: {str(e)}")
        return gray

def extract_weight_from_image(pil_img):
    """Extract weight and unit from a digital scale image using EasyOCR."""
    try:
        # Preprocess image
        thresh = preprocess_image(pil_img)
        
        # Convert to numpy array for EasyOCR
        img_np = np.array(thresh)
        
        # Use EasyOCR to detect text
        results = reader.readtext(img_np, detail=1, paragraph=False)
        logging.info(f"EasyOCR raw output: {results}")
        
        # Extract weight and unit from detected text
        text = " ".join([result[1] for result in results])  # Combine all detected text
        text = text.strip().lower()
        text = re.sub(r'\s+', ' ', text)  # Normalize spaces
        
        # Extract weight and unit, prioritizing common formats
        match = re.search(r'(-?\d*\.?\d+)([kgkg]?)', text)
        if match:
            weight_str = match.group(1)
            unit = match.group(2) if match.group(2) else "g"  # Default to grams if no unit
            weight_str = weight_str.replace(',', '.')  # Handle decimal formats (e.g., 68,0)
            if re.fullmatch(r'^-?\d*\.?\d+$', weight_str):
                weight_str = weight_str.lstrip('0') or '0'
                confidence = min([result[2] for result in results if result[1]] or [0.0]) * 100  # Convert EasyOCR confidence (0-1) to percentage
                try:
                    weight = float(weight_str)
                    if -5000 <= weight <= 5000:
                        logging.info(f"Detected weight: {weight} {unit}, Confidence: {confidence:.2f}%")
                        return weight_str, confidence, unit
                except ValueError:
                    logging.warning(f"Invalid weight format: {weight_str}")
        
        # Fallback to detect numbers without units if no match
        match_no_unit = re.search(r'(-?\d*\.?\d+)', text)
        if match_no_unit and not match:
            weight_str = match_no_unit.group(1)
            weight_str = weight_str.replace(',', '.')
            if re.fullmatch(r'^-?\d*\.?\d+$', weight_str):
                weight_str = weight_str.lstrip('0') or '0'
                confidence = min([result[2] for result in results if result[1]] or [0.0]) * 100
                try:
                    weight = float(weight_str)
                    if -5000 <= weight <= 5000:
                        logging.info(f"Detected weight (no unit): {weight} g, Confidence: {confidence:.2f}%")
                        return weight_str, confidence, "g"
                except ValueError:
                    logging.warning(f"Invalid weight format: {weight_str}")
        
        logging.info("No valid weight detected.")
        return "Not detected", 0.0, ""
    except Exception as e:
        logging.error(f"Weight extraction failed: {str(e)}")
        return "Not detected", 0.0, ""