Spaces:
Running
Running
Update ocr_engine.py
Browse files- ocr_engine.py +21 -15
ocr_engine.py
CHANGED
@@ -3,20 +3,14 @@ from PIL import Image, ImageFilter
|
|
3 |
import torch
|
4 |
import re
|
5 |
|
|
|
6 |
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
|
7 |
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
|
8 |
|
9 |
def clean_ocr_text(text):
|
|
|
10 |
text = text.replace(",", ".").replace("s", "5").replace("o", "0").replace("O", "0")
|
11 |
-
return re.sub(r"[^\d.kg]", "", text.lower())
|
12 |
-
|
13 |
-
def extract_unit_from_text(raw_text):
|
14 |
-
raw = raw_text.lower()
|
15 |
-
if "kg" in raw:
|
16 |
-
return "kg"
|
17 |
-
elif "g" in raw:
|
18 |
-
return "g"
|
19 |
-
return "g" # default fallback
|
20 |
|
21 |
def restore_decimal(text):
|
22 |
if re.fullmatch(r"\d{5}", text):
|
@@ -25,26 +19,38 @@ def restore_decimal(text):
|
|
25 |
return f"{text[:2]}.{text[2:]}"
|
26 |
return text
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
def extract_weight(image):
|
29 |
try:
|
|
|
30 |
image = image.resize((image.width * 2, image.height * 2), Image.BICUBIC)
|
31 |
image = image.filter(ImageFilter.SHARPEN)
|
32 |
|
|
|
33 |
pixel_values = processor(images=image, return_tensors="pt").pixel_values
|
34 |
generated_ids = model.generate(pixel_values)
|
35 |
raw_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
|
36 |
|
37 |
cleaned = clean_ocr_text(raw_text)
|
38 |
|
|
|
39 |
match = re.search(r"(\d{1,3}\.\d{1,3})\s*(kg|g)?", cleaned)
|
40 |
if match:
|
41 |
-
return f"{match.group(1)} {match.group(2) or ''}", raw_text
|
42 |
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
|
|
47 |
|
48 |
-
return
|
49 |
except Exception as e:
|
50 |
return f"Error: {str(e)}", ""
|
|
|
3 |
import torch
|
4 |
import re
|
5 |
|
6 |
+
# Load TrOCR model and processor
|
7 |
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
|
8 |
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
|
9 |
|
10 |
def clean_ocr_text(text):
|
11 |
+
# Fix common OCR misreads
|
12 |
text = text.replace(",", ".").replace("s", "5").replace("o", "0").replace("O", "0")
|
13 |
+
return re.sub(r"[^\d.kg]", "", text.lower()) # keep digits, dot, k, g
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
def restore_decimal(text):
|
16 |
if re.fullmatch(r"\d{5}", text):
|
|
|
19 |
return f"{text[:2]}.{text[2:]}"
|
20 |
return text
|
21 |
|
22 |
+
def extract_unit_from_text(raw_text):
|
23 |
+
raw_text = raw_text.lower()
|
24 |
+
if "kg" in raw_text:
|
25 |
+
return "kg"
|
26 |
+
elif "g" in raw_text:
|
27 |
+
return "g"
|
28 |
+
return "g" # fallback if unit not found
|
29 |
+
|
30 |
def extract_weight(image):
|
31 |
try:
|
32 |
+
# Enhance image
|
33 |
image = image.resize((image.width * 2, image.height * 2), Image.BICUBIC)
|
34 |
image = image.filter(ImageFilter.SHARPEN)
|
35 |
|
36 |
+
# OCR inference
|
37 |
pixel_values = processor(images=image, return_tensors="pt").pixel_values
|
38 |
generated_ids = model.generate(pixel_values)
|
39 |
raw_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
|
40 |
|
41 |
cleaned = clean_ocr_text(raw_text)
|
42 |
|
43 |
+
# Try direct match (e.g., 52.25 kg or 250.5g)
|
44 |
match = re.search(r"(\d{1,3}\.\d{1,3})\s*(kg|g)?", cleaned)
|
45 |
if match:
|
46 |
+
return f"{match.group(1)} {match.group(2) or ''}".strip(), raw_text
|
47 |
|
48 |
+
# Fallback if no decimal found: convert big number like 53255 to 52.255
|
49 |
+
fallback_match = re.search(r"\d{4,5}", cleaned)
|
50 |
+
if fallback_match:
|
51 |
+
decimal_fixed = restore_decimal(fallback_match.group())
|
52 |
+
return decimal_fixed, raw_text
|
53 |
|
54 |
+
return "Error: No valid weight found", raw_text
|
55 |
except Exception as e:
|
56 |
return f"Error: {str(e)}", ""
|