File size: 1,939 Bytes
e58b1c2
975f9c6
 
 
5234a64
e58b1c2
5234a64
8254c9e
9ac49a2
5234a64
301eb4d
 
0f29b7c
301eb4d
d373620
301eb4d
956dff8
301eb4d
 
 
 
956dff8
ded0d50
301eb4d
 
 
975f9c6
9ac49a2
301eb4d
9ac49a2
301eb4d
9ac49a2
 
3137c41
301eb4d
 
9ac49a2
301eb4d
 
9ac49a2
301eb4d
 
b613b80
301eb4d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ae35d6
301eb4d
b613b80
301eb4d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import pytesseract
import numpy as np
import cv2
import re
import logging
from PIL import Image

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def preprocess_for_ocr(img):
    """Apply grayscale, blur, and threshold to prepare image for OCR."""
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)

    # Adaptive threshold
    thresh = cv2.adaptiveThreshold(
        blurred, 255,
        cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
        cv2.THRESH_BINARY,
        11, 2
    )

    # Invert to make text white on black
    inverted = cv2.bitwise_not(thresh)
    return inverted

def extract_weight_from_image(pil_img):
    """Extract weight reading from an image using pytesseract."""
    try:
        # Convert PIL to OpenCV
        img = np.array(pil_img)
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

        # Preprocess
        processed_img = preprocess_for_ocr(img)

        # Tesseract config
        config = r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.'

        # Run OCR
        text = pytesseract.image_to_string(processed_img, config=config)

        # Clean text
        text = text.strip().replace('\n', '').replace(' ', '')
        text = re.sub(r"[^\d.]", "", text)

        # Handle multiple dots
        if text.count('.') > 1:
            text = text.replace('.', '', text.count('.') - 1)

        if text.startswith('.'):
            text = '0' + text

        # Validate
        if text and re.fullmatch(r"\d*\.?\d*", text):
            value = float(text)
            if 0.001 <= value <= 5000:
                return text, 90.0  # Return with fixed confidence
            else:
                logging.warning(f"Detected weight out of range: {value}")
        return "Not detected", 0.0

    except Exception as e:
        logging.error(f"OCR error: {str(e)}")
        return "Not detected", 0.0