Spaces:

Sanjayraju30
/

logger

Sleeping

Sanjayraju30 commited on Jun 30

Commit

af7cef1

verified ·

1 Parent(s): 0f4e1bf

Update ocr_engine.py

Files changed (1) hide show

ocr_engine.py CHANGED Viewed

@@ -1,27 +1,35 @@
 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 from PIL import Image
-# Load model + processor
 processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1")
 model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1")
 def extract_weight(image: Image.Image) -> str:
     image = image.convert("RGB")
     pixel_values = processor(images=image, return_tensors="pt").pixel_values
     generated_ids = model.generate(pixel_values)
     full_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-    # Extract digits
-    weight = ''.join(filter(lambda x: x in '0123456789.', full_text))
-    # Check for unit in original OCR text
-    unit = "grams"  # default
-    if "kg" in full_text.lower():
         unit = "kg"
-    elif "g" in full_text.lower():
         unit = "grams"
-    if weight:
         return f"{weight} {unit}"
     else:
         return "No valid weight detected"

 from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 from PIL import Image
+# Load OCR model once
 processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1")
 model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1")
 def extract_weight(image: Image.Image) -> str:
+    # Ensure image is in RGB
     image = image.convert("RGB")
+    # Process with Hugging Face OCR
     pixel_values = processor(images=image, return_tensors="pt").pixel_values
     generated_ids = model.generate(pixel_values)
     full_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    # Normalize text
+    full_text_cleaned = full_text.lower().replace(" ", "")
+    # Detect unit
+    if "kg" in full_text_cleaned:
         unit = "kg"
+    elif "g" in full_text_cleaned or "gram" in full_text_cleaned:
         unit = "grams"
+    else:
+        unit = "grams"  # default to grams if not clear
+    # Extract number (includes decimals)
+    import re
+    match = re.search(r"(\d+(\.\d+)?)", full_text_cleaned)
+    if match:
+        weight = match.group(1)
         return f"{weight} {unit}"
     else:
         return "No valid weight detected"