logger1

Running

Sanjayraju30 commited on 10 days ago

Commit

513f893

verified ·

1 Parent(s): 0cf36c5

Update ocr_engine.py

Files changed (1) hide show

ocr_engine.py CHANGED Viewed

@@ -1,23 +1,22 @@
-import cv2
-import pytesseract
-import numpy as np
 from PIL import Image
-def extract_weight(pil_image: Image.Image) -> str:
-    # Convert to OpenCV format
-    img = np.array(pil_image.convert("RGB"))
-    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
-    # Enhance image for OCR
-    gray = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR)
-    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
-    _, thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
-    # OCR with config
-    config = "--psm 7 -c tessedit_char_whitelist=0123456789."
-    text = pytesseract.image_to_string(thresh, config=config)
-    # Extract digits and decimal
     weight = ''.join(filter(lambda x: x in '0123456789.', text))
     return weight.strip() if weight else "No valid weight detected"

+from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 from PIL import Image
+# Load model + processor once
+processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1")
+model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1")
+def extract_weight(image: Image.Image) -> str:
+    # Convert image to RGB just in case
+    image = image.convert("RGB")
+    # Preprocess + generate prediction
+    pixel_values = processor(images=image, return_tensors="pt").pixel_values
+    generated_ids = model.generate(pixel_values)
+    # Decode output
+    text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    # Keep only numbers + decimal
     weight = ''.join(filter(lambda x: x in '0123456789.', text))
     return weight.strip() if weight else "No valid weight detected"