File size: 1,327 Bytes
e58b1c2
975f9c6
 
 
5234a64
e58b1c2
5234a64
9ac49a2
5234a64
570a997
0f29b7c
570a997
 
 
 
 
ded0d50
9ac49a2
 
 
 
570a997
 
 
 
 
 
 
 
 
 
 
 
6ae35d6
301eb4d
b613b80
570a997
301eb4d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import pytesseract
import numpy as np
import cv2
import re
import logging
from PIL import Image

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def preprocess_image(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    resized = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR)
    blurred = cv2.GaussianBlur(resized, (3, 3), 0)
    thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY_INV, 11, 2)
    return thresh

def extract_weight_from_image(pil_img):
    try:
        img = np.array(pil_img)
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        processed = preprocess_image(img)

        config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.kg'
        raw_text = pytesseract.image_to_string(processed, config=config)
        logging.info(f"OCR Raw Output: {raw_text}")

        cleaned = raw_text.replace(" ", "").replace("\n", "")
        match = re.search(r"(\d+\.?\d*)", cleaned)
        if match:
            value = float(match.group(1))
            if 0 < value <= 5000:
                return str(value), 90.0
        return "Not detected", 0.0

    except Exception as e:
        logging.error(f"OCR error: {e}")
        return "Not detected", 0.0