import pytesseract import numpy as np import cv2 import re from PIL import Image import logging # Set up logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') def preprocess_image(img): """Preprocess image for robust OCR.""" try: # Convert to OpenCV format img = np.array(img) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # Convert to grayscale gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # Estimate brightness for adaptive processing brightness = np.mean(gray) # Apply CLAHE for contrast enhancement clahe_clip = 4.0 if brightness < 100 else 2.0 clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8)) enhanced = clahe.apply(gray) # Apply adaptive thresholding block_size = max(11, min(31, int(img.shape[0] / 20) * 2 + 1)) thresh = cv2.adaptiveThreshold( enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, block_size, 2 ) # Noise reduction kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1) return thresh except Exception as e: logging.error(f"Preprocessing failed: {str(e)}") return img def extract_weight_from_image(pil_img): """Extract weight from any digital scale image.""" try: # Convert PIL image to OpenCV img = np.array(pil_img) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) # Preprocess image thresh = preprocess_image(img) # Try multiple Tesseract configurations configs = [ r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.', # Single line r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.' # Block of text ] for config in configs: text = pytesseract.image_to_string(thresh, config=config) logging.info(f"Tesseract raw output (config {config}): {text}") # Clean and validate text text = re.sub(r"[^\d\.]", "", text) if text.count('.') > 1: text = text.replace('.', '', text.count('.') - 1) text = text.strip('.') if text and re.fullmatch(r"^\d*\.?\d*$", text): text = text.lstrip('0') or '0' confidence = 95.0 if len(text.replace('.', '')) >= 3 else 90.0 try: weight = float(text) if 0.001 <= weight <= 5000: logging.info(f"Detected weight: {text} kg, Confidence: {confidence:.2f}%") return text, confidence except ValueError: logging.warning(f"Invalid weight format: {text}") logging.info("No valid weight detected.") return "Not detected", 0.0 except Exception as e: logging.error(f"Weight extraction failed: {str(e)}") return "Not detected", 0.0