import gradio as gr
import torch
import cv2
import numpy as np
from PIL import Image
from ultralytics import YOLO
import json


model_path = "best.pt" 
model = YOLO(model_path)

def preprocess_image(image):
    """Apply enhancement filters and resize image before detection."""
    image = np.array(image)

    
    image = cv2.convertScaleAbs(image, alpha=0.8, beta=0)  # Brightness reduction
    image = cv2.GaussianBlur(image, (3, 3), 0)  # Denoising
    kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])  # Sharpening
    image = cv2.filter2D(image, -1, kernel)

    
    height, width = image.shape[:2]
    new_width = 800
    new_height = int((new_width / width) * height)
    image = cv2.resize(image, (new_width, new_height))

    return image

def imageRotation(image):
    """Dummy function for now."""
    return image

def vision_ai_api(image, label):
    """Dummy function simulating API call. Returns dummy JSON response."""
    return {
        "label": label,
        "extracted_data": {
            "name": "John Doe",
            "dob": "01-01-1990",
            "id_number": "1234567890"
        }
    }

def predict(image):
    image = preprocess_image(image)  # Apply preprocessing

    results = model(image, conf=0.85)
    detected_classes = set()
    labels = []
    cropped_images = {}

    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            conf = box.conf[0]
            cls = int(box.cls[0])
            class_name = model.names[cls]

            detected_classes.add(class_name)
            labels.append(f"{class_name} {conf:.2f}")

            # Crop detected region
            cropped = image[y1:y2, x1:x2]
            cropped_pil = Image.fromarray(cropped)

            # Call Vision AI API separately for front & back
            api_response = vision_ai_api(cropped_pil, class_name)

            # Store cropped images & API response
            cropped_images[class_name] = {
                "image": cropped_pil,
                "api_response": json.dumps(api_response, indent=4)
            }

    # Identify missing classes
    possible_classes = {"front", "back"}
    missing_classes = possible_classes - detected_classes
    if missing_classes:
        labels.append(f"Missing: {', '.join(missing_classes)}")

    # Prepare Gradio outputs (separate front & back images and responses)
    front_image = cropped_images.get("front", {}).get("image", None)
    back_image = cropped_images.get("back", {}).get("image", None)

    front_response = cropped_images.get("front", {}).get("api_response", "{}")
    back_response = cropped_images.get("back", {}).get("api_response", "{}")

    return front_image, front_response, back_image, back_response, labels

# Gradio Interface
iface = gr.Interface(
    fn=predict, 
    inputs="image", 
    outputs=["image", "text", "image", "text", "text"],  
    title="License Field Detection (Front & Back Card)",
    description="Detect front & back of a license card, crop the images, and call Vision AI API separately for each."
)

iface.launch()