File size: 2,150 Bytes
5d670ae
da9f292
5d670ae
da9f292
 
5d670ae
da9f292
acddb2f
c925f8d
005d086
 
 
 
 
 
acddb2f
 
 
6257859
acddb2f
 
3ca006e
c925f8d
acddb2f
c925f8d
acddb2f
363a646
65ed4c1
363a646
acddb2f
da9f292
acddb2f
3ca006e
 
acddb2f
ee1d691
da9f292
5d670ae
6257859
 
 
 
 
 
 
005d086
6257859
477d4fe
6257859
c925f8d
acddb2f
103f82b
ee1d691
acddb2f
103f82b
6257859
ee1d691
6257859
 
 
 
acddb2f
8fe1b94
65ed4c1
2132698
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import easyocr
import numpy as np
import cv2
import re

reader = easyocr.Reader(['en'], gpu=False)

def enhance_image(img):
    # Resize big images
    max_dim = 1000
    height, width = img.shape[:2]
    if max(height, width) > max_dim:
        scale = max_dim / max(height, width)
        img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)

    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    gray = cv2.fastNlMeansDenoising(gray, h=15)

    kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
    sharp = cv2.filter2D(gray, -1, kernel)

    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    contrast = clahe.apply(sharp)

    return contrast

def extract_weight_from_image(pil_img):
    try:
        img = np.array(pil_img)
        enhanced = enhance_image(img)

        results = reader.readtext(enhanced)
        print("DEBUG OCR RESULTS:", results)

        ocr_texts = [text for _, text, _ in results]
        weight_candidates = []

        for _, text, conf in results:
            cleaned = text.lower().strip()

            # Fix common misreads
            cleaned = cleaned.replace(",", ".")  # comma → dot
            cleaned = cleaned.replace("o", "0").replace("O", "0")
            cleaned = cleaned.replace("s", "5").replace("S", "5")
            cleaned = cleaned.replace("g", "9").replace("G", "6")
            cleaned = cleaned.replace("kg", "").replace("kgs", "")
            cleaned = re.sub(r"[^\d\.]", "", cleaned)  # Keep only digits + dot

            # Match: 2 to 4 digits, optional .digit
            if re.fullmatch(r"\d{2,4}(\.\d{1,2})?", cleaned):
                weight_candidates.append((cleaned, conf))

        if not weight_candidates:
            return "Not detected", 0.0, "\n".join(ocr_texts)

        # Get highest confidence result
        best_weight, best_conf = sorted(weight_candidates, key=lambda x: -x[1])[0]

        # Remove leading zeros
        best_weight = best_weight.lstrip('0') or '0'

        return best_weight, round(best_conf * 100, 2), "\n".join(ocr_texts)

    except Exception as e:
        return f"Error: {str(e)}", 0.0, "OCR failed"