AutoWeightLogger1

Running

App Files Files Community

Sanjayraju30 commited on 4 days ago

Commit

8ccdb60

verified ·

1 Parent(s): d736dc4

Update ocr_engine.py

Browse files

Files changed (1) hide show

ocr_engine.py +26 -22

ocr_engine.py CHANGED Viewed

@@ -9,50 +9,54 @@ def extract_weight_from_image(pil_img):
     try:
         img = np.array(pil_img)
-        # Resize large images
         max_dim = 1000
         height, width = img.shape[:2]
         if max(height, width) > max_dim:
             scale = max_dim / max(height, width)
             img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
-        # Run OCR
         results = reader.readtext(img)
-        weight_candidates = []
-        fallback_weight = None
-        fallback_conf = 0.0
         for item in results:
-            if len(item) != 2:
                 continue
-            text_data = item[1]
-            if not isinstance(text_data, tuple) or len(text_data) != 2:
-                continue
-            text, conf = text_data
             cleaned = text.lower().strip()
             cleaned = cleaned.replace(",", ".")
             cleaned = cleaned.replace("o", "0").replace("O", "0")
             cleaned = cleaned.replace("s", "5").replace("S", "5")
             cleaned = cleaned.replace("g", "9").replace("G", "6")
             cleaned = cleaned.replace("kg", "").replace("kgs", "")
-            cleaned = re.sub(r"[^0-9\.]", "", cleaned)
-            if cleaned and cleaned.replace(".", "").isdigit() and not fallback_weight:
-                fallback_weight = cleaned
-                fallback_conf = conf
-            if cleaned.count(".") <= 1 and re.fullmatch(r"\d{2,4}(\.\d{1,3})?", cleaned):
-                weight_candidates.append((cleaned, conf))
-        if weight_candidates:
-            best_weight, best_conf = sorted(weight_candidates, key=lambda x: -x[1])[0]
-        elif fallback_weight:
-            best_weight, best_conf = fallback_weight, fallback_conf
-        else:
             return "Not detected", 0.0
         if "." in best_weight:
             int_part, dec_part = best_weight.split(".")
             int_part = int_part.lstrip("0") or "0"

     try:
         img = np.array(pil_img)
+        # Resize if too large
         max_dim = 1000
         height, width = img.shape[:2]
         if max(height, width) > max_dim:
             scale = max_dim / max(height, width)
             img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
         results = reader.readtext(img)
+        best_weight = None
+        best_conf = 0.0
         for item in results:
+            if len(item) != 2 or not isinstance(item[1], tuple):
                 continue
+            text, conf = item[1]
             cleaned = text.lower().strip()
+            # Fix misread characters
             cleaned = cleaned.replace(",", ".")
             cleaned = cleaned.replace("o", "0").replace("O", "0")
             cleaned = cleaned.replace("s", "5").replace("S", "5")
             cleaned = cleaned.replace("g", "9").replace("G", "6")
             cleaned = cleaned.replace("kg", "").replace("kgs", "")
+            cleaned = re.sub(r"[^\d\.]", "", cleaned)
+            # Check for number format like 75.5, 102.3
+            if re.fullmatch(r"\d{2,4}(\.\d{1,3})?", cleaned):
+                if conf > best_conf:
+                    best_weight = cleaned
+                    best_conf = conf
+        # If nothing matched, return first numeric string found
+        if not best_weight:
+            for item in results:
+                if len(item) != 2 or not isinstance(item[1], tuple):
+                    continue
+                text, conf = item[1]
+                fallback = re.sub(r"[^\d\.]", "", text)
+                if fallback and fallback.replace(".", "").isdigit():
+                    best_weight = fallback
+                    best_conf = conf
+                    break
+        if not best_weight:
             return "Not detected", 0.0
+        # Strip leading zeros
         if "." in best_weight:
             int_part, dec_part = best_weight.split(".")
             int_part = int_part.lstrip("0") or "0"