AutoWeightLogger1

Running

App Files Files Community

Sanjayraju30 commited on 4 days ago

Commit

fcdea18

verified ·

1 Parent(s): 8ccdb60

Update ocr_engine.py

Browse files

Files changed (1) hide show

ocr_engine.py +29 -6

ocr_engine.py CHANGED Viewed

@@ -5,18 +5,45 @@ import re
 reader = easyocr.Reader(['en'], gpu=False)
 def extract_weight_from_image(pil_img):
     try:
         img = np.array(pil_img)
-        # Resize if too large
         max_dim = 1000
         height, width = img.shape[:2]
         if max(height, width) > max_dim:
             scale = max_dim / max(height, width)
             img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
-        results = reader.readtext(img)
         best_weight = None
         best_conf = 0.0
@@ -27,7 +54,6 @@ def extract_weight_from_image(pil_img):
             text, conf = item[1]
             cleaned = text.lower().strip()
-            # Fix misread characters
             cleaned = cleaned.replace(",", ".")
             cleaned = cleaned.replace("o", "0").replace("O", "0")
             cleaned = cleaned.replace("s", "5").replace("S", "5")
@@ -35,13 +61,11 @@ def extract_weight_from_image(pil_img):
             cleaned = cleaned.replace("kg", "").replace("kgs", "")
             cleaned = re.sub(r"[^\d\.]", "", cleaned)
-            # Check for number format like 75.5, 102.3
             if re.fullmatch(r"\d{2,4}(\.\d{1,3})?", cleaned):
                 if conf > best_conf:
                     best_weight = cleaned
                     best_conf = conf
-        # If nothing matched, return first numeric string found
         if not best_weight:
             for item in results:
                 if len(item) != 2 or not isinstance(item[1], tuple):
@@ -56,7 +80,6 @@ def extract_weight_from_image(pil_img):
         if not best_weight:
             return "Not detected", 0.0
-        # Strip leading zeros
         if "." in best_weight:
             int_part, dec_part = best_weight.split(".")
             int_part = int_part.lstrip("0") or "0"

 reader = easyocr.Reader(['en'], gpu=False)
+def enhance_image(img):
+    # Convert to grayscale
+    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    # Apply sharpening kernel
+    kernel = np.array([[0, -1, 0], [-1, 5,-1], [0, -1, 0]])
+    sharp = cv2.filter2D(gray, -1, kernel)
+    # Contrast Limited Adaptive Histogram Equalization (CLAHE)
+    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
+    contrast = clahe.apply(sharp)
+    # Denoising
+    denoised = cv2.fastNlMeansDenoising(contrast, h=30)
+    # Adaptive threshold for very dim images
+    thresh = cv2.adaptiveThreshold(denoised, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+                                   cv2.THRESH_BINARY, 11, 2)
+    return thresh
 def extract_weight_from_image(pil_img):
     try:
         img = np.array(pil_img)
+        # Resize if too large or too small
         max_dim = 1000
         height, width = img.shape[:2]
         if max(height, width) > max_dim:
             scale = max_dim / max(height, width)
             img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
+        elif max(height, width) < 400:
+            scale = 2.5  # Upscale very small images
+            img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
+        # Enhance image for OCR
+        preprocessed = enhance_image(img)
+        results = reader.readtext(preprocessed)
         best_weight = None
         best_conf = 0.0
             text, conf = item[1]
             cleaned = text.lower().strip()
             cleaned = cleaned.replace(",", ".")
             cleaned = cleaned.replace("o", "0").replace("O", "0")
             cleaned = cleaned.replace("s", "5").replace("S", "5")
             cleaned = cleaned.replace("kg", "").replace("kgs", "")
             cleaned = re.sub(r"[^\d\.]", "", cleaned)
             if re.fullmatch(r"\d{2,4}(\.\d{1,3})?", cleaned):
                 if conf > best_conf:
                     best_weight = cleaned
                     best_conf = conf
         if not best_weight:
             for item in results:
                 if len(item) != 2 or not isinstance(item[1], tuple):
         if not best_weight:
             return "Not detected", 0.0
         if "." in best_weight:
             int_part, dec_part = best_weight.split(".")
             int_part = int_part.lstrip("0") or "0"