import gradio as gr from PIL import Image, ImageEnhance, ImageOps import torch from transformers import TrOCRProcessor, VisionEncoderDecoderModel from datetime import datetime import pytz import re # Load model and processor processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1") model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1") # Preprocess image to enhance OCR accuracy def enhance_image(image): # Convert to grayscale image = image.convert("L") # Invert for better contrast image = ImageOps.invert(image) # Increase contrast and sharpness image = ImageEnhance.Contrast(image).enhance(2.5) image = ImageEnhance.Sharpness(image).enhance(3.0) # Resize (bigger = easier to read digits clearly) image = image.resize((image.width * 3, image.height * 3)) # Convert back to RGB for model return image.convert("RGB") # Extract accurate decimal weight def detect_weight(image): try: # Enhance image processed_image = enhance_image(image) # OCR using Hugging Face pixel_values = processor(images=processed_image, return_tensors="pt").pixel_values # Use slightly longer decoding to improve accuracy generated_ids = model.generate( pixel_values, max_length=64, num_beams=4, # Beam search to improve precision early_stopping=True ) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip() # Extract full decimal weight like 52.75 or 18.89 match = re.search(r"(\d{1,4}(?:\.\d{1,4})?)", generated_text) weight = match.group(1) if match else "Not detected" # Timestamp in IST ist = pytz.timezone('Asia/Kolkata') current_time = datetime.now(ist).strftime("%Y-%m-%d %H:%M:%S") return f"Weight: {weight} kg\nCaptured At: {current_time} (IST)", image except Exception as e: return f"Error: {str(e)}", image # Gradio UI interface = gr.Interface( fn=detect_weight, inputs=gr.Image(type="pil", label="Upload or Capture Image"), outputs=[gr.Textbox(label="Weight Info"), gr.Image(label="Snapshot")], title="⚖️ Auto Weight Detector (Decimal Accurate)", description="Detects full weight including decimals (e.g., 52.75 kg) from digital scale image using Hugging Face OCR." ) interface.launch()