logger2

Running

Sanjayraju30 commited on 2 days ago

Commit

1dbcf19

verified ·

1 Parent(s): d5ceb6c

Update ocr_engine.py

Files changed (1) hide show

ocr_engine.py CHANGED Viewed

@@ -1,19 +1,28 @@
 from PIL import Image
-import pytesseract
 import re
-def extract_weight(img_path):
-    img = Image.open(img_path).convert("L")  # Grayscale
-    # OCR
-    text = pytesseract.image_to_string(img, config='--psm 6')
-    text = text.lower().replace('\n', ' ').strip()
-    # Find weight + unit (e.g., 52.25 g, 75.8 kg)
-    match = re.search(r'(\d+\.\d+|\d+)\s*(kg|g)', text)
-    if match:
-        number = match.group(1)
-        unit = match.group(2)
-        return f"{number} {unit}"
-    else:
-        return "Weight not detected"

+from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 from PIL import Image
+import torch
 import re
+# Load TrOCR model and processor once
+processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
+model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
+def extract_weight(image):
+    try:
+        # OCR Inference
+        pixel_values = processor(images=image, return_tensors="pt").pixel_values
+        generated_ids = model.generate(pixel_values)
+        text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
+        print("OCR Output:", text)
+        # Pattern to detect weight with optional decimal and unit (g or kg)
+        match = re.search(r'(\d{1,5}(?:\.\d{1,3})?)\s*(kg|g)', text.lower())
+        if match:
+            value = match.group(1)
+            unit = match.group(2)
+            return f"{value} {unit}"
+        else:
+            return "No valid weight found"
+    except Exception as e:
+        return f"Error: {str(e)}"