import gradio as gr
from PIL import Image, ImageEnhance, ImageOps
import torch
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from datetime import datetime
import pytz
import re

# Load model and processor
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1")
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1")

# Preprocess image to enhance OCR accuracy
def enhance_image(image):
    # Convert to grayscale
    image = image.convert("L")
    # Invert for better contrast
    image = ImageOps.invert(image)
    # Increase contrast and sharpness
    image = ImageEnhance.Contrast(image).enhance(2.5)
    image = ImageEnhance.Sharpness(image).enhance(3.0)
    # Resize (bigger = easier to read digits clearly)
    image = image.resize((image.width * 3, image.height * 3))
    # Convert back to RGB for model
    return image.convert("RGB")

# Extract accurate decimal weight
def detect_weight(image):
    try:
        # Enhance image
        processed_image = enhance_image(image)

        # OCR using Hugging Face
        pixel_values = processor(images=processed_image, return_tensors="pt").pixel_values
        # Use slightly longer decoding to improve accuracy
        generated_ids = model.generate(
            pixel_values,
            max_length=64,
            num_beams=4,         # Beam search to improve precision
            early_stopping=True
        )
        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()

        # Extract full decimal weight like 52.75 or 18.89
        match = re.search(r"(\d{1,4}(?:\.\d{1,4})?)", generated_text)
        weight = match.group(1) if match else "Not detected"

        # Timestamp in IST
        ist = pytz.timezone('Asia/Kolkata')
        current_time = datetime.now(ist).strftime("%Y-%m-%d %H:%M:%S")

        return f"Weight: {weight} kg\nCaptured At: {current_time} (IST)", image
    except Exception as e:
        return f"Error: {str(e)}", image

# Gradio UI
interface = gr.Interface(
    fn=detect_weight,
    inputs=gr.Image(type="pil", label="Upload or Capture Image"),
    outputs=[gr.Textbox(label="Weight Info"), gr.Image(label="Snapshot")],
    title="⚖️ Auto Weight Detector (Decimal Accurate)",
    description="Detects full weight including decimals (e.g., 52.75 kg) from digital scale image using Hugging Face OCR."
)

interface.launch()