Spaces:
Running
Running
Update ocr_engine.py
Browse files- ocr_engine.py +16 -9
ocr_engine.py
CHANGED
@@ -10,7 +10,7 @@ model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwrit
|
|
10 |
def clean_ocr_text(text):
|
11 |
# Fix common OCR misreads
|
12 |
text = text.replace(",", ".").replace("s", "5").replace("o", "0").replace("O", "0")
|
13 |
-
return re.sub(r"[^\d.kg]", "", text.lower())
|
14 |
|
15 |
def restore_decimal(text):
|
16 |
if re.fullmatch(r"\d{5}", text):
|
@@ -25,11 +25,11 @@ def extract_unit_from_text(raw_text):
|
|
25 |
return "kg"
|
26 |
elif "g" in raw_text:
|
27 |
return "g"
|
28 |
-
return "g" # fallback if unit
|
29 |
|
30 |
def extract_weight(image):
|
31 |
try:
|
32 |
-
#
|
33 |
image = image.resize((image.width * 2, image.height * 2), Image.BICUBIC)
|
34 |
image = image.filter(ImageFilter.SHARPEN)
|
35 |
|
@@ -40,17 +40,24 @@ def extract_weight(image):
|
|
40 |
|
41 |
cleaned = clean_ocr_text(raw_text)
|
42 |
|
43 |
-
#
|
44 |
match = re.search(r"(\d{1,3}\.\d{1,3})\s*(kg|g)?", cleaned)
|
45 |
if match:
|
46 |
return f"{match.group(1)} {match.group(2) or ''}".strip(), raw_text
|
47 |
|
48 |
-
#
|
49 |
-
|
50 |
-
if
|
51 |
-
decimal_fixed = restore_decimal(
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
return "Error: No valid weight found", raw_text
|
|
|
55 |
except Exception as e:
|
56 |
return f"Error: {str(e)}", ""
|
|
|
10 |
def clean_ocr_text(text):
|
11 |
# Fix common OCR misreads
|
12 |
text = text.replace(",", ".").replace("s", "5").replace("o", "0").replace("O", "0")
|
13 |
+
return re.sub(r"[^\d.kg]", "", text.lower())
|
14 |
|
15 |
def restore_decimal(text):
|
16 |
if re.fullmatch(r"\d{5}", text):
|
|
|
25 |
return "kg"
|
26 |
elif "g" in raw_text:
|
27 |
return "g"
|
28 |
+
return "g" # fallback if no unit
|
29 |
|
30 |
def extract_weight(image):
|
31 |
try:
|
32 |
+
# Resize & sharpen image
|
33 |
image = image.resize((image.width * 2, image.height * 2), Image.BICUBIC)
|
34 |
image = image.filter(ImageFilter.SHARPEN)
|
35 |
|
|
|
40 |
|
41 |
cleaned = clean_ocr_text(raw_text)
|
42 |
|
43 |
+
# Case 1: Match decimal with unit
|
44 |
match = re.search(r"(\d{1,3}\.\d{1,3})\s*(kg|g)?", cleaned)
|
45 |
if match:
|
46 |
return f"{match.group(1)} {match.group(2) or ''}".strip(), raw_text
|
47 |
|
48 |
+
# Case 2: Large number fallback like 53255 → 52.255
|
49 |
+
match = re.search(r"\d{4,5}", cleaned)
|
50 |
+
if match:
|
51 |
+
decimal_fixed = restore_decimal(match.group())
|
52 |
+
unit = extract_unit_from_text(raw_text)
|
53 |
+
return f"{decimal_fixed} {unit}", raw_text
|
54 |
+
|
55 |
+
# Final fallback: plain number
|
56 |
+
match = re.search(r"\d+", cleaned)
|
57 |
+
if match:
|
58 |
+
return f"{match.group()} g", raw_text
|
59 |
|
60 |
return "Error: No valid weight found", raw_text
|
61 |
+
|
62 |
except Exception as e:
|
63 |
return f"Error: {str(e)}", ""
|