Spaces:
Build error
Build error
Update ocr_engine.py
Browse files- ocr_engine.py +10 -12
ocr_engine.py
CHANGED
@@ -4,39 +4,37 @@ import cv2
|
|
4 |
import numpy as np
|
5 |
from PIL import Image
|
6 |
|
7 |
-
# Initialize EasyOCR reader
|
8 |
reader = easyocr.Reader(['en'], gpu=False)
|
9 |
|
10 |
def preprocess_image(image):
|
11 |
# Convert to grayscale
|
12 |
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
|
13 |
|
14 |
-
#
|
15 |
-
|
16 |
-
|
17 |
|
18 |
-
|
19 |
-
kernel = np.ones((2, 2), np.uint8)
|
20 |
-
dilated = cv2.dilate(thresh, kernel, iterations=1)
|
21 |
-
|
22 |
-
return dilated
|
23 |
|
24 |
def extract_weight_from_image(pil_image):
|
25 |
try:
|
26 |
-
# Convert PIL to OpenCV
|
27 |
image = np.array(pil_image.convert("RGB"))
|
28 |
processed = preprocess_image(image)
|
29 |
|
30 |
# OCR
|
31 |
result = reader.readtext(processed)
|
|
|
32 |
|
33 |
-
# Filter and extract digits like weight (e.g., 75.5)
|
34 |
weight = None
|
35 |
confidence = 0.0
|
|
|
36 |
for detection in result:
|
37 |
text = detection[1]
|
38 |
conf = detection[2]
|
39 |
-
|
|
|
40 |
if match:
|
41 |
weight = match.group()
|
42 |
confidence = conf
|
|
|
4 |
import numpy as np
|
5 |
from PIL import Image
|
6 |
|
7 |
+
# Initialize EasyOCR reader
|
8 |
reader = easyocr.Reader(['en'], gpu=False)
|
9 |
|
10 |
def preprocess_image(image):
|
11 |
# Convert to grayscale
|
12 |
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
|
13 |
|
14 |
+
# Blur + Otsu thresholding
|
15 |
+
blur = cv2.GaussianBlur(gray, (3, 3), 0)
|
16 |
+
_, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
17 |
|
18 |
+
return thresh
|
|
|
|
|
|
|
|
|
19 |
|
20 |
def extract_weight_from_image(pil_image):
|
21 |
try:
|
22 |
+
# Convert PIL image to OpenCV image
|
23 |
image = np.array(pil_image.convert("RGB"))
|
24 |
processed = preprocess_image(image)
|
25 |
|
26 |
# OCR
|
27 |
result = reader.readtext(processed)
|
28 |
+
print("OCR Results:", result) # for debugging
|
29 |
|
|
|
30 |
weight = None
|
31 |
confidence = 0.0
|
32 |
+
|
33 |
for detection in result:
|
34 |
text = detection[1]
|
35 |
conf = detection[2]
|
36 |
+
|
37 |
+
match = re.search(r"\b\d+(\.\d+)?\b", text) # more flexible matching
|
38 |
if match:
|
39 |
weight = match.group()
|
40 |
confidence = conf
|