File size: 802 Bytes
3165179
 
 
65ef5f8
 
 
1cb8b90
65ef5f8
3165179
65ef5f8
1cb8b90
65ef5f8
1cb8b90
 
65ef5f8
1cb8b90
 
 
65ef5f8
1cb8b90
3165179
1cb8b90
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import cv2
import pytesseract
import numpy as np
from PIL import Image

def extract_weight(pil_image: Image.Image) -> str:
    # Convert to OpenCV format
    img = np.array(pil_image.convert("RGB"))
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

    # Enhance image for OCR
    gray = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    _, thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # OCR with config
    config = "--psm 7 -c tessedit_char_whitelist=0123456789."
    text = pytesseract.image_to_string(thresh, config=config)

    # Extract digits and decimal
    weight = ''.join(filter(lambda x: x in '0123456789.', text))

    return weight.strip() if weight else "No valid weight detected"