import pytesseract
import numpy as np
import cv2
import re
from PIL import Image
import logging

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def preprocess_image(img):
    """Preprocess image for robust OCR."""
    try:
        # Convert to OpenCV format
        img = np.array(img)
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        
        # Convert to grayscale
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        # Estimate brightness for adaptive processing
        brightness = np.mean(gray)
        
        # Apply CLAHE for contrast enhancement
        clahe_clip = 4.0 if brightness < 100 else 2.0
        clahe = cv2.createCLAHE(clipLimit=clahe_clip, tileGridSize=(8, 8))
        enhanced = clahe.apply(gray)
        
        # Apply adaptive thresholding
        block_size = max(11, min(31, int(img.shape[0] / 20) * 2 + 1))
        thresh = cv2.adaptiveThreshold(
            enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, block_size, 2
        )
        
        # Noise reduction
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
        thresh = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
        
        return thresh
    except Exception as e:
        logging.error(f"Preprocessing failed: {str(e)}")
        return img

def extract_weight_from_image(pil_img):
    """Extract weight from any digital scale image."""
    try:
        # Convert PIL image to OpenCV
        img = np.array(pil_img)
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        
        # Preprocess image
        thresh = preprocess_image(img)
        
        # Try multiple Tesseract configurations
        configs = [
            r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.',  # Single line
            r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.'   # Block of text
        ]
        for config in configs:
            text = pytesseract.image_to_string(thresh, config=config)
            logging.info(f"Tesseract raw output (config {config}): {text}")
            
            # Clean and validate text
            text = re.sub(r"[^\d\.]", "", text)
            if text.count('.') > 1:
                text = text.replace('.', '', text.count('.') - 1)
            text = text.strip('.')
            if text and re.fullmatch(r"^\d*\.?\d*$", text):
                text = text.lstrip('0') or '0'
                confidence = 95.0 if len(text.replace('.', '')) >= 3 else 90.0
                try:
                    weight = float(text)
                    if 0.001 <= weight <= 5000:
                        logging.info(f"Detected weight: {text} kg, Confidence: {confidence:.2f}%")
                        return text, confidence
                except ValueError:
                    logging.warning(f"Invalid weight format: {text}")
        
        logging.info("No valid weight detected.")
        return "Not detected", 0.0
    except Exception as e:
        logging.error(f"Weight extraction failed: {str(e)}")
        return "Not detected", 0.0