File size: 2,513 Bytes
5d670ae
da9f292
5d670ae
da9f292
 
61b752b
5d670ae
da9f292
363a646
65ed4c1
363a646
da9f292
61b752b
ddf8948
 
 
 
 
 
61b752b
ddf8948
3ca006e
 
ddf8948
ee1d691
ddf8948
 
 
61b752b
ddf8948
 
 
61b752b
ddf8948
61b752b
 
 
ddf8948
 
477d4fe
ddf8948
 
61b752b
 
ddf8948
 
 
61b752b
ddf8948
acddb2f
103f82b
61b752b
ddf8948
 
 
 
 
61b752b
ddf8948
61b752b
ddf8948
 
 
 
 
 
103f82b
ddf8948
8fe1b94
65ed4c1
2132698
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import easyocr
import numpy as np
import cv2
import re

# Load EasyOCR reader
reader = easyocr.Reader(['en'], gpu=False)

def extract_weight_from_image(pil_img):
    try:
        img = np.array(pil_img)

        # Resize very large images
        max_dim = 1000
        height, width = img.shape[:2]
        if max(height, width) > max_dim:
            scale = max_dim / max(height, width)
            img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)

        # OCR recognition
        results = reader.readtext(img)
        print("DEBUG OCR RESULTS:", results)

        raw_texts = []
        weight_candidates = []
        fallback_weight = None
        fallback_conf = 0.0

        for _, (text, conf) in results:
            original = text
            cleaned = text.lower().strip()

            # Fix common OCR misreads
            cleaned = cleaned.replace(",", ".")
            cleaned = cleaned.replace("o", "0").replace("O", "0")
            cleaned = cleaned.replace("s", "5").replace("S", "5")
            cleaned = cleaned.replace("g", "9").replace("G", "6")
            cleaned = cleaned.replace("kg", "").replace("kgs", "")
            cleaned = re.sub(r"[^0-9\.]", "", cleaned)

            raw_texts.append(f"{original}{cleaned} (conf: {round(conf, 2)})")

            # Save fallback if no match later
            if cleaned and cleaned.replace(".", "").isdigit() and not fallback_weight:
                fallback_weight = cleaned
                fallback_conf = conf

            # Match proper weight format: 75.02, 97.2, 105
            if cleaned.count(".") <= 1 and re.fullmatch(r"\d{2,4}(\.\d{1,3})?", cleaned):
                weight_candidates.append((cleaned, conf))

        # Choose best candidate
        if weight_candidates:
            best_weight, best_conf = sorted(weight_candidates, key=lambda x: -x[1])[0]
        elif fallback_weight:
            best_weight, best_conf = fallback_weight, fallback_conf
        else:
            return "Not detected", 0.0, "\n".join(raw_texts)

        # Strip unnecessary leading zeros
        if "." in best_weight:
            int_part, dec_part = best_weight.split(".")
            int_part = int_part.lstrip("0") or "0"
            best_weight = f"{int_part}.{dec_part}"
        else:
            best_weight = best_weight.lstrip("0") or "0"

        return best_weight, round(best_conf * 100, 2), "\n".join(raw_texts)

    except Exception as e:
        return f"Error: {str(e)}", 0.0, "OCR failed"