Spaces:

mrneuralnet
/

tech-assessment-kecilin

Sleeping

File size: 6,220 Bytes

4142b1b

from PIL import Image
from ultralytics import YOLO



from utils import readb64, img2base64

model_int8 = YOLO('weights/best.torchscript', task='detect')


def inference_on_image(path):
    results = model_int8(path)

    img = cv2.imread(path, cv2.COLOR_BGR2RGB)
    for box in results[0].boxes:
        cls = box.cls.item()
        confidence = box.conf.item()
        label = labels[cls]

        x1, y1, x2, y2 = map(int, list(box.xyxy.numpy()[0]))
        cv2.rectangle(img, (x1, y1), (x2, y2), (0, 102, 255), 2)
        img = cv2.rectangle(img, (x1, y1 - 20), (x2, y1), (0, 102, 255), -1)
        img = cv2.putText(img, "{}: {:.3f}".format(label, confidence), (x1,y1-5),cv2.FONT_HERSHEY_SIMPLEX,0.6,(255,255,255), 1)
        
    cv2.imshow('Detected Image', img)
    cv2.waitKey(0)

    return results

def inference_on_video(path, vid_stride=10):
    results = model_int8(path, vid_stride=10, stream=True)

    cap = cv2.VideoCapture(path)
    ret, img = cap.read()

    frame_counter = 0
    while True:
        ret, img = cap.read()
        if ret:
            if frame_counter % 10 == 0: 
                result = next(results)
            for box in result.boxes:
                cls = box.cls.item()
                confidence = box.conf.item()
                label = labels[cls]

                x1, y1, x2, y2 = map(int, list(box.xyxy.numpy()[0]))
                cv2.rectangle(img, (x1, y1), (x2, y2), (0, 102, 255), 2)
                img = cv2.rectangle(img, (x1, y1 - 20), (x2, y1), (0, 102, 255), -1)
                img = cv2.putText(img, "{}: {:.3f}".format(label, confidence), (x1,y1-5),cv2.FONT_HERSHEY_SIMPLEX,0.6,(255,255,255), 1)
        else:
            cap.release()
            break
        
        cv2.imshow('Detected Image', img)
        frame_counter += 1

        k = cv2.waitKey(5) & 0xFF
        if k == 27:
            cap.release()
            cv2.destroyAllWindows()
            break

    return results


class ImagePipeline:
    def __init__(self, device='cpu', gpu_id=0, weights='weights/best.torchscript'):
        self.model = YOLO(weights, task='detect')

    def preprocess(self, data):
        image_base64 = data.pop("images", data)

        if not type(image_base64) == list:
            image_base64 = [image_base64]
        elif len(image_base64) > 1:
            raise Exception("ImagePipeline only accepts 1 image/frame")
        
        images = [readb64(image) for image in image_base64]
        return images

    def inference(self, images):
        results = self.model(images[0])
        return results

    def get_response(self, inference_result):
        response = []

        if 0 in inference_result[0].boxes.cls.numpy():
            message = "An ambulance is found "
        else:
            message = "There is no ambulance"

        for i, result in enumerate(inference_result):
            for xywhn, cls, conf in zip(
                result.boxes.xywhn,
                result.boxes.cls,
                result.boxes.conf
            ):
                xywhn = list(xywhn.numpy())
                response.append({
                    'xywhn': {
                        'x': float(xywhn[0]),
                        'y': float(xywhn[1]),
                        'w': float(xywhn[2]),
                        'h': float(xywhn[3]),
                    },
                    'class': cls.item(),
                    'confidence': conf.item(),
                })

        return {'results': response,
                'message': message}
    
    def __call__(self, data, config_payload=None):
        images = self.preprocess(data)
        inference_result = self.inference(images)
        response = self.get_response(inference_result)
        return response

class VideoPipeline:
    def __init__(self, device='cpu', gpu_id=0, weights='weights/best.torchscript'):
        self.model = YOLO(weights, task='detect')

    def preprocess(self, data):
        return data

    def inference(self, video_path, vid_stride=30):
        results = self.model(video_path, vid_stride=vid_stride)
        return results

    def get_response(self, inference_result):
        response = []

        
        # default message
        message = "There is no ambulance"

        for i, result in enumerate(inference_result):
            
            if 0 in result.boxes.cls.numpy():
                message = "An ambulance is found"

            for xywhn, cls, conf in zip(
                result.boxes.xywhn,
                result.boxes.cls,
                result.boxes.conf
            ):
                xywhn = list(xywhn.numpy())
                response.append({
                    'xywhn': {
                        'x': float(xywhn[0]),
                        'y': float(xywhn[1]),
                        'w': float(xywhn[2]),
                        'h': float(xywhn[3]),
                    },
                    'class': cls.item(),
                    'confidence': conf.item(),
                })

        return {'results': response,
                'message': message}
    
    def __call__(self, data, config_payload=None):
        data = self.preprocess(data)
        inference_result = self.inference(data)
        response = self.get_response(inference_result)
        return response


if __name__ == '__main__':
    import cv2
    import argparse
 
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--input_type',
                    default='image',
                    const='image',
                    nargs='?',
                    choices=['image', 'video'],
                    help='type of input (default: %(default)s)')
    parser.add_argument("-p", "--path", help="filepath")
    args = parser.parse_args()

    labels = {
        0: 'ambulance',
        1: 'truck'
    }

    if args.input_type=='image':
        results = inference_on_image(args.path)
    elif args.input_type == 'video':
        results = inference_on_video(args.path)
    
    
    print(results)

    
    # Examples
    # python pipelines.py --input_type image --path sample_files/ambulance-2.jpeg
    # python pipelines.py --input_type video --path sample_files/ambulance.mp4