File size: 2,225 Bytes
5d670ae
da9f292
5d670ae
da9f292
 
29533d7
5d670ae
da9f292
acddb2f
29533d7
005d086
 
 
 
 
 
29533d7
acddb2f
 
 
 
 
 
29533d7
acddb2f
 
29533d7
3ca006e
29533d7
acddb2f
29533d7
acddb2f
363a646
65ed4c1
363a646
acddb2f
da9f292
acddb2f
3ca006e
 
acddb2f
ee1d691
da9f292
5d670ae
29533d7
 
 
 
 
 
 
005d086
acddb2f
477d4fe
29533d7
acddb2f
 
103f82b
ee1d691
acddb2f
103f82b
29533d7
ee1d691
29533d7
 
 
 
acddb2f
8fe1b94
65ed4c1
2132698
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import easyocr
import numpy as np
import cv2
import re

# Initialize EasyOCR
reader = easyocr.Reader(['en'], gpu=False)

def enhance_image(img):
    # Downscale large images
    max_dim = 1000
    height, width = img.shape[:2]
    if max(height, width) > max_dim:
        scale = max_dim / max(height, width)
        img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)

    # Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

    # Denoise
    gray = cv2.fastNlMeansDenoising(gray, h=15)

    # Sharpen
    kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
    sharp = cv2.filter2D(gray, -1, kernel)

    # Enhance contrast
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    enhanced = clahe.apply(sharp)

    return enhanced

def extract_weight_from_image(pil_img):
    try:
        img = np.array(pil_img)
        enhanced = enhance_image(img)

        results = reader.readtext(enhanced)
        print("DEBUG OCR RESULTS:", results)

        ocr_texts = [text for _, text, _ in results]
        weight_candidates = []

        for _, text, conf in results:
            cleaned = text.lower().strip()

            # Replace common OCR misreads
            cleaned = cleaned.replace(",", ".")
            cleaned = cleaned.replace("o", "0").replace("O", "0")
            cleaned = cleaned.replace("s", "5").replace("S", "5")
            cleaned = cleaned.replace("g", "9").replace("G", "6")
            cleaned = cleaned.replace("kg", "").replace("kgs", "")
            cleaned = re.sub(r"[^\d\.]", "", cleaned)

            # Match numbers like 84.5, 102.3, 99.9
            if re.fullmatch(r"\d{2,4}(\.\d{1,2})?", cleaned):
                weight_candidates.append((cleaned, conf))

        if not weight_candidates:
            return "Not detected", 0.0, "\n".join(ocr_texts)

        # Pick highest confidence match
        best_weight, best_conf = sorted(weight_candidates, key=lambda x: -x[1])[0]

        # Strip leading zeros if any
        best_weight = best_weight.lstrip('0') or '0'

        return best_weight, round(best_conf * 100, 2), "\n".join(ocr_texts)

    except Exception as e:
        return f"Error: {str(e)}", 0.0, "OCR failed"