AutoWeightLogger1

Sleeping

App Files Files Community

Sanjayraju30 commited on Jun 12

Commit

2bcb746

verified ·

1 Parent(s): e8bd3b9

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -19

app.py CHANGED Viewed

@@ -10,36 +10,42 @@ import re
 processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1")
 model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1")
-# Enhance image before OCR
 def enhance_image(image):
     # Convert to grayscale
     image = image.convert("L")
-    # Invert (light text on dark bg works better)
     image = ImageOps.invert(image)
     # Increase contrast and sharpness
-    image = ImageEnhance.Contrast(image).enhance(2.0)
-    image = ImageEnhance.Sharpness(image).enhance(2.0)
-    # Resize (bigger = easier for OCR)
-    image = image.resize((image.width * 2, image.height * 2))
-    # Convert back to RGB for model compatibility
-    image = image.convert("RGB")
-    return image
-# Extract weight
 def detect_weight(image):
     try:
         processed_image = enhance_image(image)
-        # Send to Hugging Face OCR model
         pixel_values = processor(images=processed_image, return_tensors="pt").pixel_values
-        generated_ids = model.generate(pixel_values)
-        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        # Extract number using regex
-        match = re.search(r"(\d{1,4}(?:\.\d{1,2})?)", generated_text)
         weight = match.group(1) if match else "Not detected"
-        # Get current IST time
         ist = pytz.timezone('Asia/Kolkata')
         current_time = datetime.now(ist).strftime("%Y-%m-%d %H:%M:%S")
@@ -52,8 +58,8 @@ interface = gr.Interface(
     fn=detect_weight,
     inputs=gr.Image(type="pil", label="Upload or Capture Image"),
     outputs=[gr.Textbox(label="Weight Info"), gr.Image(label="Snapshot")],
-    title="⚖️ Auto Weight Detector (No Tesseract)",
-    description="Detects weight from digital scale image using Hugging Face TrOCR. Shows weight and capture time (IST)."
 )
 interface.launch()

 processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1")
 model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1")
+# Preprocess image to enhance OCR accuracy
 def enhance_image(image):
     # Convert to grayscale
     image = image.convert("L")
+    # Invert for better contrast
     image = ImageOps.invert(image)
     # Increase contrast and sharpness
+    image = ImageEnhance.Contrast(image).enhance(2.5)
+    image = ImageEnhance.Sharpness(image).enhance(3.0)
+    # Resize (bigger = easier to read digits clearly)
+    image = image.resize((image.width * 3, image.height * 3))
+    # Convert back to RGB for model
+    return image.convert("RGB")
+# Extract accurate decimal weight
 def detect_weight(image):
     try:
+        # Enhance image
         processed_image = enhance_image(image)
+        # OCR using Hugging Face
         pixel_values = processor(images=processed_image, return_tensors="pt").pixel_values
+        # Use slightly longer decoding to improve accuracy
+        generated_ids = model.generate(
+            pixel_values,
+            max_length=64,
+            num_beams=4,         # Beam search to improve precision
+            early_stopping=True
+        )
+        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
+        # Extract full decimal weight like 52.75 or 18.89
+        match = re.search(r"(\d{1,4}(?:\.\d{1,4})?)", generated_text)
         weight = match.group(1) if match else "Not detected"
+        # Timestamp in IST
         ist = pytz.timezone('Asia/Kolkata')
         current_time = datetime.now(ist).strftime("%Y-%m-%d %H:%M:%S")
     fn=detect_weight,
     inputs=gr.Image(type="pil", label="Upload or Capture Image"),
     outputs=[gr.Textbox(label="Weight Info"), gr.Image(label="Snapshot")],
+    title="⚖️ Auto Weight Detector (Decimal Accurate)",
+    description="Detects full weight including decimals (e.g., 52.75 kg) from digital scale image using Hugging Face OCR."
 )
 interface.launch()