Spaces:

Sanjayraju30
/

Autoweight

Build error

Sanjayraju30 commited on about 1 month ago

Commit

7e1096c

verified ·

1 Parent(s): c316ca4

Update ocr_engine.py

Files changed (1) hide show

ocr_engine.py CHANGED Viewed

@@ -1,21 +1,41 @@
-import easyocr
-import re
-reader = easyocr.Reader(['en'])  # Load once
-def extract_weight_from_image(pil_image):
-    results = reader.readtext(pil_image)
-    weight = None
-    confidence = 0.0
-    for (bbox, text, conf) in results:
-        match = re.search(r'(\d+(\.\d+)?)\s?g', text.lower())
-        if match:
-            weight = match.group(1) + " g"
-            confidence = conf
-            break
-    if weight:
         return weight, confidence
-    else:
-        return "No weight detected", 0.0

+import cv2
+import pytesseract
+import numpy as np
+from PIL import Image
+def extract_weight_from_image(pil_img):
+    try:
+        # Convert PIL image to OpenCV format
+        img = pil_img.convert("RGB")
+        img_np = np.array(img)
+        img_cv = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
+        # Convert to grayscale
+        gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
+        # Adaptive Thresholding for 7-segment LCD
+        processed = cv2.adaptiveThreshold(
+            gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 15, 10
+        )
+        # Resize to enhance small text
+        resized = cv2.resize(processed, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR)
+        # OCR config tuned for digit blocks
+        config = r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.'
+        # Run OCR
+        text = pytesseract.image_to_string(resized, config=config)
+        print("🔍 RAW OCR OUTPUT:", repr(text))
+        # Clean the text
+        weight = ''.join(c for c in text if c in '0123456789.')
+        weight = weight.strip()
+        confidence = 95 if weight else 0
         return weight, confidence
+    except Exception as e:
+        print("❌ OCR Error:", str(e))
+        return "", 0