Spaces:
Runtime error
Runtime error
Update ocr_engine.py
Browse files- ocr_engine.py +10 -8
ocr_engine.py
CHANGED
@@ -7,19 +7,21 @@ def extract_weight_from_image(pil_img):
|
|
7 |
try:
|
8 |
img = np.array(pil_img)
|
9 |
|
10 |
-
#
|
11 |
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
|
12 |
-
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
15 |
|
16 |
# OCR using pytesseract
|
17 |
-
|
18 |
-
text = pytesseract.image_to_string(thresh, config=config)
|
19 |
print("OCR Output:", text)
|
20 |
|
21 |
-
# Regex to
|
22 |
-
match = re.search(r"\b\d{
|
23 |
if match:
|
24 |
return match.group(), 95.0
|
25 |
else:
|
|
|
7 |
try:
|
8 |
img = np.array(pil_img)
|
9 |
|
10 |
+
# Convert to grayscale
|
11 |
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
|
12 |
+
|
13 |
+
# Resize for better OCR
|
14 |
+
gray = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
|
15 |
+
|
16 |
+
# Thresholding
|
17 |
+
_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
18 |
|
19 |
# OCR using pytesseract
|
20 |
+
text = pytesseract.image_to_string(thresh, config='--psm 6 digits')
|
|
|
21 |
print("OCR Output:", text)
|
22 |
|
23 |
+
# Regex to extract weight (like 54.20, 102.5, etc.)
|
24 |
+
match = re.search(r"\b\d{2,4}\.?\d{0,2}\b", text)
|
25 |
if match:
|
26 |
return match.group(), 95.0
|
27 |
else:
|