import gradio as gr import torch import cv2 import numpy as np from PIL import Image from ultralytics import YOLO import json model_path = "best.pt" model = YOLO(model_path) def preprocess_image(image): """Apply enhancement filters and resize image before detection.""" image = np.array(image) image = cv2.convertScaleAbs(image, alpha=0.8, beta=0) # Brightness reduction image = cv2.GaussianBlur(image, (3, 3), 0) # Denoising kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]]) # Sharpening image = cv2.filter2D(image, -1, kernel) height, width = image.shape[:2] new_width = 800 new_height = int((new_width / width) * height) image = cv2.resize(image, (new_width, new_height)) return image def imageRotation(image): """Dummy function for now.""" return image def vision_ai_api(image, label): """Dummy function simulating API call. Returns dummy JSON response.""" return { "label": label, "extracted_data": { "name": "John Doe", "dob": "01-01-1990", "id_number": "1234567890" } } def predict(image): image = preprocess_image(image) # Apply preprocessing results = model(image, conf=0.85) detected_classes = set() labels = [] cropped_images = {} for result in results: for box in result.boxes: x1, y1, x2, y2 = map(int, box.xyxy[0]) conf = box.conf[0] cls = int(box.cls[0]) class_name = model.names[cls] detected_classes.add(class_name) labels.append(f"{class_name} {conf:.2f}") # Crop detected region cropped = image[y1:y2, x1:x2] cropped_pil = Image.fromarray(cropped) # Call Vision AI API separately for front & back api_response = vision_ai_api(cropped_pil, class_name) # Store cropped images & API response cropped_images[class_name] = { "image": cropped_pil, "api_response": json.dumps(api_response, indent=4) } # Identify missing classes possible_classes = {"front", "back"} missing_classes = possible_classes - detected_classes if missing_classes: labels.append(f"Missing: {', '.join(missing_classes)}") # Prepare Gradio outputs (separate front & back images and responses) front_image = cropped_images.get("front", {}).get("image", None) back_image = cropped_images.get("back", {}).get("image", None) front_response = cropped_images.get("front", {}).get("api_response", "{}") back_response = cropped_images.get("back", {}).get("api_response", "{}") return front_image, front_response, back_image, back_response, labels # Gradio Interface iface = gr.Interface( fn=predict, inputs="image", outputs=["image", "text", "image", "text", "text"], title="License Field Detection (Front & Back Card)", description="Detect front & back of a license card, crop the images, and call Vision AI API separately for each." ) iface.launch()