Auto-weight-logger2

Sleeping

Sanjayraju30 commited on May 21

Commit

1a4ff0a

verified ·

1 Parent(s): 844e1ad

Update src/ocr_engine.py

Files changed (1) hide show

src/ocr_engine.py CHANGED Viewed

@@ -1,10 +1,22 @@
-import pytesseract
-from PIL import Image
-import re
-def extract_weight_from_image(image):
-    text = pytesseract.image_to_string(image)
-    match = re.search(r"\d+\.\d{2}", text)
-    if match:
-        return match.group()
-    return "No weight found"

+def extract_weight_from_image(pil_img):
+    import os
+    os.system("apt-get update && apt-get install -y libgl1-mesa-glx")
+    import cv2
+    import numpy as np
+    import pytesseract
+    from PIL import Image
+    image = np.array(pil_img.convert("RGB"))
+    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
+    blur = cv2.GaussianBlur(gray, (3, 3), 0)
+    _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+    config = '--psm 7 -c tessedit_char_whitelist=0123456789.'
+    data = pytesseract.image_to_data(thresh, config=config, output_type=pytesseract.Output.DICT)
+    extracted_text = ''.join(filter(lambda x: x in '0123456789.', ''.join(data['text'])))
+    confidences = [int(conf) for conf in data['conf'] if conf.isdigit()]
+    avg_conf = sum(confidences)/len(confidences) if confidences else 0
+    return extracted_text.strip(), avg_conf