import pytesseract
import numpy as np
import cv2
import re
import logging
from PIL import Image

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def preprocess_for_ocr(img):
    """Apply grayscale, blur, and threshold to prepare image for OCR."""
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)

    # Adaptive threshold
    thresh = cv2.adaptiveThreshold(
        blurred, 255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY,
        11, 2
    )

    # Invert to make text white on black
    inverted = cv2.bitwise_not(thresh)
    return inverted

def extract_weight_from_image(pil_img):
    """Extract weight reading from an image using pytesseract."""
    try:
        # Convert PIL to OpenCV
        img = np.array(pil_img)
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

        # Preprocess
        processed_img = preprocess_for_ocr(img)

        # Tesseract config
        config = r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.'

        # Run OCR
        text = pytesseract.image_to_string(processed_img, config=config)

        # Clean text
        text = text.strip().replace('\n', '').replace(' ', '')
        text = re.sub(r"[^\d.]", "", text)

        # Handle multiple dots
        if text.count('.') > 1:
            text = text.replace('.', '', text.count('.') - 1)

        if text.startswith('.'):
            text = '0' + text

        # Validate
        if text and re.fullmatch(r"\d*\.?\d*", text):
            value = float(text)
            if 0.001 <= value <= 5000:
                return text, 90.0  # Return with fixed confidence
            else:
                logging.warning(f"Detected weight out of range: {value}")
        return "Not detected", 0.0

    except Exception as e:
        logging.error(f"OCR error: {str(e)}")
        return "Not detected", 0.0