Spaces:
Running
Running
Update ocr_engine.py
Browse files- ocr_engine.py +26 -22
ocr_engine.py
CHANGED
@@ -9,50 +9,54 @@ def extract_weight_from_image(pil_img):
|
|
9 |
try:
|
10 |
img = np.array(pil_img)
|
11 |
|
12 |
-
# Resize large
|
13 |
max_dim = 1000
|
14 |
height, width = img.shape[:2]
|
15 |
if max(height, width) > max_dim:
|
16 |
scale = max_dim / max(height, width)
|
17 |
img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
|
18 |
|
19 |
-
# Run OCR
|
20 |
results = reader.readtext(img)
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
fallback_conf = 0.0
|
25 |
|
26 |
for item in results:
|
27 |
-
if len(item) != 2:
|
28 |
continue
|
29 |
-
|
30 |
-
if not isinstance(text_data, tuple) or len(text_data) != 2:
|
31 |
-
continue
|
32 |
-
|
33 |
-
text, conf = text_data
|
34 |
cleaned = text.lower().strip()
|
|
|
|
|
35 |
cleaned = cleaned.replace(",", ".")
|
36 |
cleaned = cleaned.replace("o", "0").replace("O", "0")
|
37 |
cleaned = cleaned.replace("s", "5").replace("S", "5")
|
38 |
cleaned = cleaned.replace("g", "9").replace("G", "6")
|
39 |
cleaned = cleaned.replace("kg", "").replace("kgs", "")
|
40 |
-
cleaned = re.sub(r"[
|
41 |
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
45 |
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
-
if
|
50 |
-
best_weight, best_conf = sorted(weight_candidates, key=lambda x: -x[1])[0]
|
51 |
-
elif fallback_weight:
|
52 |
-
best_weight, best_conf = fallback_weight, fallback_conf
|
53 |
-
else:
|
54 |
return "Not detected", 0.0
|
55 |
|
|
|
56 |
if "." in best_weight:
|
57 |
int_part, dec_part = best_weight.split(".")
|
58 |
int_part = int_part.lstrip("0") or "0"
|
|
|
9 |
try:
|
10 |
img = np.array(pil_img)
|
11 |
|
12 |
+
# Resize if too large
|
13 |
max_dim = 1000
|
14 |
height, width = img.shape[:2]
|
15 |
if max(height, width) > max_dim:
|
16 |
scale = max_dim / max(height, width)
|
17 |
img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
|
18 |
|
|
|
19 |
results = reader.readtext(img)
|
20 |
|
21 |
+
best_weight = None
|
22 |
+
best_conf = 0.0
|
|
|
23 |
|
24 |
for item in results:
|
25 |
+
if len(item) != 2 or not isinstance(item[1], tuple):
|
26 |
continue
|
27 |
+
text, conf = item[1]
|
|
|
|
|
|
|
|
|
28 |
cleaned = text.lower().strip()
|
29 |
+
|
30 |
+
# Fix misread characters
|
31 |
cleaned = cleaned.replace(",", ".")
|
32 |
cleaned = cleaned.replace("o", "0").replace("O", "0")
|
33 |
cleaned = cleaned.replace("s", "5").replace("S", "5")
|
34 |
cleaned = cleaned.replace("g", "9").replace("G", "6")
|
35 |
cleaned = cleaned.replace("kg", "").replace("kgs", "")
|
36 |
+
cleaned = re.sub(r"[^\d\.]", "", cleaned)
|
37 |
|
38 |
+
# Check for number format like 75.5, 102.3
|
39 |
+
if re.fullmatch(r"\d{2,4}(\.\d{1,3})?", cleaned):
|
40 |
+
if conf > best_conf:
|
41 |
+
best_weight = cleaned
|
42 |
+
best_conf = conf
|
43 |
|
44 |
+
# If nothing matched, return first numeric string found
|
45 |
+
if not best_weight:
|
46 |
+
for item in results:
|
47 |
+
if len(item) != 2 or not isinstance(item[1], tuple):
|
48 |
+
continue
|
49 |
+
text, conf = item[1]
|
50 |
+
fallback = re.sub(r"[^\d\.]", "", text)
|
51 |
+
if fallback and fallback.replace(".", "").isdigit():
|
52 |
+
best_weight = fallback
|
53 |
+
best_conf = conf
|
54 |
+
break
|
55 |
|
56 |
+
if not best_weight:
|
|
|
|
|
|
|
|
|
57 |
return "Not detected", 0.0
|
58 |
|
59 |
+
# Strip leading zeros
|
60 |
if "." in best_weight:
|
61 |
int_part, dec_part = best_weight.split(".")
|
62 |
int_part = int_part.lstrip("0") or "0"
|