import gradio as gr
import cv2
import requests
import os
from ultralytics import YOLO

# Define the colors for different classes
colors = {
    0: (255, 0, 0),    # Red for class 0
    1: (0, 128, 0),    # Green (dark) for class 1
    2: (0, 0, 255),    # Blue for class 2
    3: (255, 255, 0),  # Yellow for class 3
    4: (255, 0, 255),  # Magenta for class 4
    5: (0, 255, 255),  # Cyan for class 5
    6: (128, 0, 0),    # Maroon for class 6
    7: (0, 225, 0),    # Green for class 7
}

# Load the YOLO model
model = YOLO('modelbest.pt')

def show_preds_image(image_path):
    image = cv2.imread(image_path)
    outputs = model.predict(source=image_path)
    results = outputs[0].cpu().numpy()

    for i, det in enumerate(results.boxes.xyxy):
        class_id = int(results.boxes.cls[i])
        label = model.names[class_id]

        # Get the bounding box coordinates
        x1, y1, x2, y2 = int(det[0]), int(det[1]), int(det[2]), int(det[3])
        
        # Draw the bounding box with the specified color
        color = colors.get(class_id, (0, 0, 255))
        cv2.rectangle(image, (x1, y1), (x2, y2), color, 2, cv2.LINE_AA)
        
        # Calculate text size and position
        label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.75, 2)
        text_x = x1 + (x2 - x1) // 2 - label_size[0] // 2
        text_y = y1 + (y2 - y1) // 2 + label_size[1] // 2

        # Draw the label text
        cv2.putText(image, label, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.75, color, 2, cv2.LINE_AA)

    return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

inputs_image = gr.Image(type="filepath", label="Input Image")
outputs_image = gr.Image(type="numpy", label="Output Image")

interface_image = gr.Interface(
    fn=show_preds_image,
    inputs=inputs_image,
    outputs=outputs_image,
    title="Smoke Detection on Indian Roads"
)

def show_preds_video(video_path):
    cap = cv2.VideoCapture(video_path)
    
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter('output_video.mp4', fourcc, fps, (width, height))
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        frame_copy = frame.copy()
        outputs = model.predict(source=frame)
        results = outputs[0].cpu().numpy()

        for i, det in enumerate(results.boxes.xyxy):
            class_id = int(results.boxes.cls[i])
            label = model.names[class_id]

            x1, y1, x2, y2 = int(det[0]), int(det[1]), int(det[2]), int(det[3])
            
            color = colors.get(class_id, (0, 0, 255))
            cv2.rectangle(frame_copy, (x1, y1), (x2, y2), color, 2, cv2.LINE_AA)
            
            label_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.75, 2)
            text_x = x1 + (x2 - x1) // 2 - label_size[0] // 2
            text_y = y1 + (y2 - y1) // 2 + label_size[1] // 2

            cv2.putText(frame_copy, label, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.75, color, 2, cv2.LINE_AA)

        out.write(frame_copy)
    
    cap.release()
    out.release()

    return 'output_video.mp4'

inputs_video = gr.Video(format="mp4", label="Input Video")
outputs_video = gr.Video(label="Output Video")

interface_video = gr.Interface(
    fn=show_preds_video,
    inputs=inputs_video,
    outputs=outputs_video,
    title="Smoke Detection on Indian Roads"
)

gr.TabbedInterface(
    [interface_image, interface_video],
    tab_names=['Image inference', 'Video inference']
).queue().launch()