# import numpy as np
# import cv2
# import tritonclient.grpc as grpcclient
# import sys
# import argparse
# import os

# # Class names for the dataset
# class_names = [
#     'helm',
#     'no_helm',
#     "person"
# ]

# def get_triton_client(url: str = '104.192.4.139:8001'):
#     # try:
#     keepalive_options = grpcclient.KeepAliveOptions(
#         keepalive_time_ms=2**31 - 1,
#         keepalive_timeout_ms=20000,
#         keepalive_permit_without_calls=False,
#         http2_max_pings_without_data=2
#     )
#     triton_client = grpcclient.InferenceServerClient(
#         url=url,
#         verbose=False,
#         keepalive_options=keepalive_options)
#     # except Exception as e:
#     #     print("Channel creation failed: " + str(e))
#     #     sys.exit()
#     return triton_client

# def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
#     label = f'{class_names[class_id]}: {confidence:.2f}'
#     color = (255, 0, 0)
#     cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2)
#     cv2.putText(img, label, (x - 10, y - 10),
#                 cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

# def process_frame(frame, expected_image_shape, model_name, triton_client):
#     original_image, input_image, scale = preprocess_frame(frame, expected_image_shape)
#     num_detections, detection_boxes, detection_scores, detection_classes = run_inference(
#         model_name, input_image, triton_client)

#     for index in range(num_detections[0]):
#         box = detection_boxes[index]
#         draw_bounding_box(original_image,
#                           detection_classes[index],
#                           detection_scores[index],
#                           round(box[0] * scale),
#                           round(box[1] * scale),
#                           round((box[0] + box[2]) * scale),
#                           round((box[1] + box[3]) * scale))
#     return original_image

# def preprocess_frame(frame, expected_image_shape):
#     expected_width = expected_image_shape[0]
#     expected_height = expected_image_shape[1]
#     expected_length = min(expected_height, expected_width)
#     [height, width, _] = frame.shape
#     length = max(height, width)
#     image = np.zeros((length, length, 3), np.uint8)
#     image[0:height, 0:width] = frame
#     scale = length / expected_length

#     input_image = cv2.resize(image, (expected_width, expected_height))
#     input_image = (input_image / 255.0).astype(np.float32)
#     input_image = input_image.transpose(2, 0, 1)  # Channel first
#     input_image = np.expand_dims(input_image, axis=0)
#     return frame, input_image, scale

# def run_inference(model_name: str, input_image: np.ndarray, triton_client: grpcclient.InferenceServerClient):
#     inputs = [grpcclient.InferInput('images', input_image.shape, "FP32")]
#     inputs[0].set_data_from_numpy(input_image)

#     outputs = [
#         grpcclient.InferRequestedOutput('num_detections'),
#         grpcclient.InferRequestedOutput('detection_boxes'),
#         grpcclient.InferRequestedOutput('detection_scores'),
#         grpcclient.InferRequestedOutput('detection_classes')
#     ]

#     results = triton_client.infer(model_name=model_name, inputs=inputs, outputs=outputs)

#     num_detections = results.as_numpy('num_detections')
#     detection_boxes = results.as_numpy('detection_boxes')
#     detection_scores = results.as_numpy('detection_scores')
#     detection_classes = results.as_numpy('detection_classes')
#     return num_detections, detection_boxes, detection_scores, detection_classes

# def main(video_path, model_name, url):
#     triton_client = get_triton_client(url)
#     expected_image_shape = triton_client.get_model_metadata(model_name).inputs[0].shape[-2:]

#     cap = cv2.VideoCapture(video_path)
#     if not cap.isOpened():
#         print("Error: Could not open video.")
#         sys.exit()

#     fourcc = cv2.VideoWriter_fourcc(*'XVID')
#     output_path = os.path.splitext(video_path)[0] + "_output.avi"
#     out = cv2.VideoWriter(output_path, fourcc, 20.0, (int(cap.get(3)), int(cap.get(4))))

#     while True:
#         ret, frame = cap.read()
#         if not ret:
#             break

#         # Process each frame
#         output_frame = process_frame(frame, expected_image_shape, model_name, triton_client)
        
#         # Write processed frame to the output video
#         out.write(output_frame)

#         # Display the frame with bounding boxes
#         cv2.imshow('Video', output_frame)
#         if cv2.waitKey(1) & 0xFF == ord('q'):
#             break

#     cap.release()
#     out.release()
#     cv2.destroyAllWindows()
#     print(f"Output saved as {output_path}")

# if __name__ == '__main__':
#     parser = argparse.ArgumentParser()
#     parser.add_argument('--video_path', type=str, default='./assets/helmet.mp4')
#     parser.add_argument('--model_name', type=str, default='yolov8_ensemble')
#     parser.add_argument('--url', type=str, default='104.192.4.139:8001')
#     args = parser.parse_args()
#     main(args.video_path, args.model_name, args.url)
import numpy as np
import cv2
import tritonclient.grpc as grpcclient
import sys
import argparse
import os

# Class names for the dataset
class_names = [
    'helm',
    'no_helm',
    "person"
]

def get_triton_client(url: str = 'localhost:8001'):
    keepalive_options = grpcclient.KeepAliveOptions(
        keepalive_time_ms=2**31 - 1,
        keepalive_timeout_ms=20000,
        keepalive_permit_without_calls=False,
        http2_max_pings_without_data=2
    )
    return grpcclient.InferenceServerClient(url=url, verbose=False, keepalive_options=keepalive_options)

def draw_bounding_box(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
    label = f'{class_names[class_id]}: {confidence:.2f}'
    color = (255, 0, 0)
    cv2.rectangle(img, (x, y), (x_plus_w, y_plus_h), color, 2)
    cv2.putText(img, label, (x - 10, y - 10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

def process_frame(frame, expected_image_shape, model_name, triton_client):
    original_image, input_image, scale = preprocess_frame(frame, expected_image_shape)
    num_detections, detection_boxes, detection_scores, detection_classes = run_inference(
        model_name, input_image, triton_client)

    for index in range(num_detections[0]):
        box = detection_boxes[index]
        draw_bounding_box(original_image,
                          detection_classes[index],
                          detection_scores[index],
                          round(box[0] * scale),
                          round(box[1] * scale),
                          round((box[0] + box[2]) * scale),
                          round((box[1] + box[3]) * scale))
    return original_image

def preprocess_frame(frame, expected_image_shape):
    expected_width, expected_height = expected_image_shape
    height, width, _ = frame.shape
    length = max(height, width)
    image = np.zeros((length, length, 3), np.uint8)
    image[0:height, 0:width] = frame
    scale = length / min(expected_image_shape)

    input_image = cv2.resize(image, (expected_width, expected_height))
    input_image = (input_image / 255.0).astype(np.float32)
    input_image = input_image.transpose(2, 0, 1)  # Channel first
    input_image = np.expand_dims(input_image, axis=0)
    return frame, input_image, scale

def run_inference(model_name: str, input_image: np.ndarray, triton_client: grpcclient.InferenceServerClient):
    inputs = [grpcclient.InferInput('images', input_image.shape, "FP32")]
    inputs[0].set_data_from_numpy(input_image)

    outputs = [
        grpcclient.InferRequestedOutput('num_detections'),
        grpcclient.InferRequestedOutput('detection_boxes'),
        grpcclient.InferRequestedOutput('detection_scores'),
        grpcclient.InferRequestedOutput('detection_classes')
    ]

    results = triton_client.infer(model_name=model_name, inputs=inputs, outputs=outputs)

    num_detections = results.as_numpy('num_detections')
    detection_boxes = results.as_numpy('detection_boxes')
    detection_scores = results.as_numpy('detection_scores')
    detection_classes = results.as_numpy('detection_classes')
    return num_detections, detection_boxes, detection_scores, detection_classes

def main(video_path, model_name, url):
    triton_client = get_triton_client(url)
    expected_image_shape = triton_client.get_model_metadata(model_name).inputs[0].shape[-2:]

    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Error: Could not open video.")
        sys.exit()

    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    output_path = os.path.splitext(video_path)[0] + "_output.avi"
    out = cv2.VideoWriter(output_path, fourcc, 20.0, (int(cap.get(3)), int(cap.get(4))))

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Process each frame
        output_frame = process_frame(frame, expected_image_shape, model_name, triton_client)
        
        # Write processed frame to the output video
        out.write(output_frame)

        # Display the frame with bounding boxes
        cv2.imshow('Video', output_frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    out.release()
    cv2.destroyAllWindows()
    print(f"Output saved as {output_path}")

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--video_path', type=str, default='./assets/helmet.mp4')
    parser.add_argument('--model_name', type=str, default='yolov8_ensemble')
    parser.add_argument('--url', type=str, default='localhost:8001')
    args = parser.parse_args()
    main(args.video_path, args.model_name, args.url)