import cv2
import numpy as np
import gradio as gr  # type: ignore
from mbnet import load_model, detect_objects, get_box_dimensions, draw_labels, load_img
from yolov3 import load_image, load_yolo, detect_objects_yolo, get_box_dimensions_yolo, draw_labels_yolo


# Image Inference

def img_inf(img,model):
    if model=="MobileNet-SSD":
        model, classes, colors = load_model()
        image, height, width, channels = load_img(img)
        blob, outputs = detect_objects(image, model)
        boxes, class_ids = get_box_dimensions(outputs, height, width)
        image1 = draw_labels(boxes, colors, class_ids, classes, image)
        return cv2.cvtColor(image1, cv2.COLOR_BGR2RGB)
    else:
        model, classes, colors, output_layers = load_yolo()
        image, height, width, channels = load_image(img)
        blob, outputs = detect_objects_yolo(image, model, output_layers)
        boxes, confs, class_ids = get_box_dimensions_yolo(outputs, height, width)
        image=draw_labels_yolo(boxes, confs, colors, class_ids, classes, image)
        return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)    

     
model_name = gr.Radio(["MobileNet-SSD", "YOLOv3"], value="YOLOv3", label="Model", info="choose your model")
inputs_image = gr.Image(type="filepath", label="Input Image")
outputs_image = gr.Image(type="numpy", label="Output Image")
interface_image = gr.Interface(
    fn=img_inf,
    inputs=[inputs_image,model_name],
    outputs=outputs_image,
    title="Image Inference",
    description="Upload your photo and select one model and see the results!",
    examples=[["sample/dog.jpg"]],
    cache_examples=False,
)


# Video Inference

def vid_inf(vid, model_type):
    if model_type == "MobileNet-SSD":
        cap = cv2.VideoCapture(vid)
        # get the video frames' width and height for proper saving of videos
        frame_width = int(cap.get(3))
        frame_height = int(cap.get(4))
        fps = int(cap.get(cv2.CAP_PROP_FPS))
        frame_size = (frame_width, frame_height)
        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
        output_video = "output_recorded.mp4"

        # create the `VideoWriter()` object
        out = cv2.VideoWriter(output_video, fourcc, fps, frame_size)

        model, classes, colors = load_model()
        while cap.isOpened():
            ret, frame = cap.read()
            if ret:
                height, width, channels = frame.shape
                blob, outputs = detect_objects(frame, model)
                boxes, class_ids = get_box_dimensions(outputs, height, width)
                frame = draw_labels(boxes, colors, class_ids, classes, frame)
                out.write(frame)
                yield cv2.cvtColor(frame, cv2.COLOR_BGR2RGB),None
            else:
                break

        cap.release()
        out.release()
        cv2.destroyAllWindows()
        yield None, output_video

    else:
        cap = cv2.VideoCapture(vid)
        # get the video frames' width and height for proper saving of videos
        frame_width = int(cap.get(3))
        frame_height = int(cap.get(4))
        fps = int(cap.get(cv2.CAP_PROP_FPS))
        frame_size = (frame_width, frame_height)
        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
        output_video = "output_recorded.mp4"

        # create the `VideoWriter()` object
        out = cv2.VideoWriter(output_video, fourcc, fps, frame_size)

        model, classes, colors, output_layers = load_yolo()
        while cap.isOpened():
            ret, frame_y = cap.read()
            if ret:
                height, width, channels = frame_y.shape
                blob, outputs = detect_objects_yolo(frame_y, model, output_layers)
                boxes, confs, class_ids = get_box_dimensions_yolo(outputs, height, width)
                frame_y = draw_labels_yolo(boxes, confs, colors, class_ids, classes, frame_y)
                out.write(frame_y)
                yield cv2.cvtColor(frame_y, cv2.COLOR_BGR2RGB), None
            else:
                break

        cap.release()
        out.release()
        cv2.destroyAllWindows()
        yield None, output_video


model_name = gr.Radio(["MobileNet-SSD", "YOLOv3"], value="YOLOv3", label="Model", info="choose your model")
input_video = gr.Video(sources=None, label="Input Video")
output_frame = gr.Image(type="numpy", label="Output Frames")
output_video_file = gr.Video(label="Output video")


interface_video = gr.Interface(
    fn=vid_inf,
    inputs=[input_video, model_name],
    outputs=[output_frame,output_video_file],
    title="Video Inference",
    description="Upload your video and select one model and see the results!",
    examples=[["sample/video_1.mp4"],["sample/person.mp4"]],
    cache_examples=False,
)

gr.TabbedInterface(
    [interface_image, interface_video],
    tab_names=['Image', 'Video'],
    title='GradioxOpenCV-DNN'
).queue().launch()