File size: 2,121 Bytes
e58b1c2
975f9c6
 
 
5234a64
e58b1c2
5234a64
b18d0cd
9ac49a2
5234a64
b18d0cd
 
 
0f29b7c
d373620
b18d0cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ded0d50
975f9c6
9ac49a2
b18d0cd
9ac49a2
 
 
3137c41
b18d0cd
b613b80
b18d0cd
 
 
 
 
301eb4d
b18d0cd
 
 
 
 
 
 
 
 
301eb4d
b18d0cd
 
 
 
301eb4d
6ae35d6
301eb4d
b613b80
b18d0cd
301eb4d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import pytesseract
import numpy as np
import cv2
import re
import logging
from PIL import Image

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')


def preprocess_strong(img):
    """Sharpen and enhance contrast for blurry weight images."""
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Resize up for OCR (scale x2)
    h, w = gray.shape
    gray = cv2.resize(gray, (w * 2, h * 2), interpolation=cv2.INTER_CUBIC)

    # CLAHE for contrast enhancement
    clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
    enhanced = clahe.apply(gray)

    # Strong sharpening
    kernel = np.array([[0, -1, 0],
                       [-1, 5, -1],
                       [0, -1, 0]])
    sharpened = cv2.filter2D(enhanced, -1, kernel)

    return sharpened


def extract_weight_from_image(pil_img):
    """Extract weight from an image using multiple Tesseract strategies."""
    try:
        img = np.array(pil_img)
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

        processed = preprocess_strong(img)

        # OCR configs
        configs = [
            r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.',
            r'--oem 3 --psm 11 -c tessedit_char_whitelist=0123456789.'
        ]

        for config in configs:
            raw_text = pytesseract.image_to_string(processed, config=config)
            logging.info(f"[Tesseract Output {config}] Raw text: {raw_text}")
            cleaned = raw_text.strip().replace('\n', '').replace(' ', '')
            cleaned = re.sub(r"[^\d.]", "", cleaned)
            if cleaned.count('.') > 1:
                cleaned = cleaned.replace('.', '', cleaned.count('.') - 1)
            if cleaned.startswith('.'):
                cleaned = '0' + cleaned

            if cleaned and re.fullmatch(r"\d*\.?\d*", cleaned):
                value = float(cleaned)
                if 0.001 <= value <= 5000:
                    return str(round(value, 2)), 90.0

        return "Not detected", 0.0

    except Exception as e:
        logging.error(f"OCR failed: {e}")
        return "Not detected", 0.0