AutoWeightLogger1

Sleeping

App Files Files Community

Sanjayraju30 commited on 24 days ago

Commit

e790db4

verified ·

1 Parent(s): 3bd13bb

Update ocr_engine.py

Browse files

Files changed (1) hide show

ocr_engine.py +18 -9

ocr_engine.py CHANGED Viewed

@@ -10,9 +10,15 @@ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-# Initialize TrOCR
-processor = TrOCRProcessor.from_pretrained("microsoft/trocr-small-printed")
-model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-small-printed")
 # Directory for debug images
 DEBUG_DIR = "debug_images"
@@ -122,13 +128,16 @@ def detect_roi(img):
 def perform_ocr(img):
     """Perform OCR using TrOCR for digital displays."""
     try:
         # Convert to PIL for TrOCR
         pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
         save_debug_image(pil_img, "06_ocr_input")
         # Process image with TrOCR
         pixel_values = processor(pil_img, return_tensors="pt").pixel_values
-        generated_ids = model.generate(pixel_values)
         text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
         logging.info(f"TrOCR raw output: {text}")
@@ -139,7 +148,7 @@ def perform_ocr(img):
         text = text.strip('.')
         if text and re.fullmatch(r"^\d*\.?\d*$", text):
             text = text.lstrip('0') or '0'
-            confidence = 95.0 if len(text.replace('.', '')) > 1 else 90.0
             logging.info(f"Validated text: {text}, Confidence: {confidence:.2f}%")
             return text, confidence
         logging.info(f"Text '{text}' failed validation.")
@@ -156,17 +165,17 @@ def extract_weight_from_image(pil_img):
         save_debug_image(img, "00_input_image")
         img = correct_rotation(img)
         brightness = estimate_brightness(img)
-        conf_threshold = 0.6 if brightness > 100 else 0.4
         roi_img, roi_bbox = detect_roi(img)
         if roi_bbox:
-            conf_threshold *= 1.2 if (roi_bbox[2] * roi_bbox[3]) > (img.shape[0] * img.shape[1] * 0.3) else 1.0
         result, confidence = perform_ocr(roi_img)
         if result and confidence >= conf_threshold * 100:
             try:
                 weight = float(result)
-                if 0.00001 <= weight <= 10000:
                     logging.info(f"Detected weight: {result} kg, Confidence: {confidence:.2f}%")
                     return result, confidence
                 logging.warning(f"Weight {result} out of range.")
@@ -178,7 +187,7 @@ def extract_weight_from_image(pil_img):
         if result and confidence >= conf_threshold * 0.9 * 100:
             try:
                 weight = float(result)
-                if 0.00001 <= weight <= 10000:
                     logging.info(f"Full image weight: {result} kg, Confidence: {confidence:.2f}%")
                     return result, confidence
                 logging.warning(f"Full image weight {result} out of range.")

 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# Initialize TrOCR with error handling
+try:
+    processor = TrOCRProcessor.from_pretrained("microsoft/trocr-small-printed")
+    model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-small-printed")
+    logging.info("TrOCR model and processor loaded successfully")
+except Exception as e:
+    logging.error(f"Failed to load TrOCR model: {str(e)}")
+    processor = None
+    model = None
 # Directory for debug images
 DEBUG_DIR = "debug_images"
 def perform_ocr(img):
     """Perform OCR using TrOCR for digital displays."""
+    if processor is None or model is None:
+        logging.error("TrOCR model not loaded, cannot perform OCR.")
+        return None, 0.0
     try:
         # Convert to PIL for TrOCR
         pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
         save_debug_image(pil_img, "06_ocr_input")
         # Process image with TrOCR
         pixel_values = processor(pil_img, return_tensors="pt").pixel_values
+        generated_ids = model.generate(pixel_values, max_length=10)
         text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
         logging.info(f"TrOCR raw output: {text}")
         text = text.strip('.')
         if text and re.fullmatch(r"^\d*\.?\d*$", text):
             text = text.lstrip('0') or '0'
+            confidence = 95.0 if len(text.replace('.', '')) >= 2 else 85.0
             logging.info(f"Validated text: {text}, Confidence: {confidence:.2f}%")
             return text, confidence
         logging.info(f"Text '{text}' failed validation.")
         save_debug_image(img, "00_input_image")
         img = correct_rotation(img)
         brightness = estimate_brightness(img)
+        conf_threshold = 0.7 if brightness > 100 else 0.5
         roi_img, roi_bbox = detect_roi(img)
         if roi_bbox:
+            conf_threshold *= 1.1 if (roi_bbox[2] * roi_bbox[3]) > (img.shape[0] * img.shape[1] * 0.3) else 1.0
         result, confidence = perform_ocr(roi_img)
         if result and confidence >= conf_threshold * 100:
             try:
                 weight = float(result)
+                if 0.01 <= weight <= 1000:  # Narrowed range for typical scale weights
                     logging.info(f"Detected weight: {result} kg, Confidence: {confidence:.2f}%")
                     return result, confidence
                 logging.warning(f"Weight {result} out of range.")
         if result and confidence >= conf_threshold * 0.9 * 100:
             try:
                 weight = float(result)
+                if 0.01 <= weight <= 1000:
                     logging.info(f"Full image weight: {result} kg, Confidence: {confidence:.2f}%")
                     return result, confidence
                 logging.warning(f"Full image weight {result} out of range.")