File size: 1,299 Bytes
5b73d24
65ed4c1
5b73d24
8fe1b94
 
65ed4c1
363a646
65ed4c1
8fe1b94
363a646
65ed4c1
701d11a
363a646
701d11a
8fe1b94
 
 
 
 
701d11a
8fe1b94
 
 
 
65ed4c1
8fe1b94
 
4a07e0e
8fe1b94
 
 
 
 
 
65ed4c1
 
 
 
8fe1b94
65ed4c1
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import pytesseract
import numpy as np
import re
import cv2
from PIL import Image

def extract_weight_from_image(pil_img):
    try:
        # Convert PIL image to numpy array
        img = np.array(pil_img)

        # Convert to grayscale
        gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

        # Resize image to improve OCR accuracy
        resized = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)

        # Apply Gaussian blur
        blurred = cv2.GaussianBlur(resized, (5, 5), 0)

        # Apply adaptive thresholding for better contrast
        thresh = cv2.adaptiveThreshold(
            blurred, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 11, 2
        )

        # OCR config: use digit-only mode and whitelist characters
        config = "--psm 7 -c tessedit_char_whitelist=0123456789."

        # Extract text using pytesseract
        ocr_text = pytesseract.image_to_string(thresh, config=config)
        print("OCR Text:", ocr_text)

        # Use regex to find weight values (e.g., 52.35, 002.50 etc.)
        match = re.search(r"\b\d{2,4}\.?\d{0,2}\b", ocr_text)
        if match:
            return match.group(), 95.0
        else:
            return "No weight detected", 0.0

    except Exception as e:
        return f"Error: {str(e)}", 0.0