Spaces:

tappyness1
/

error_analysis_obj_det

Sleeping

File size: 3,738 Bytes

b78b0dc

from peekingduck.pipeline.nodes.model import yolo as pkd_yolo
import cv2
from collections import defaultdict
import numpy as np
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

def convert_labels(labels_dict, bbox_labels):
    for k, v in labels_dict.items():
        bbox_labels[bbox_labels == k] = v

    # FutureWarning: elementwise comparison failed; returning scalar, but in the future will perform elementwise comparison
    # throws up this warning because making a change string to int is something that numpy disagrees with (???). 

    return bbox_labels

def run_inference(img_matrix, model, labels_dict = {'person': 1, 'bicycle': 2}):
    """Helper function to run per image inference, get bbox, labels and scores and stack them for confusion matrix output

    Args:
        img_matrix (np.array): _description_
        model: _description_
        labels_dict (dict, optional): _description_. Defaults to {'person': 0, 'bicycle': 1}.

    Returns:
        concated (np.array): concatenated inference of n x (bbox (default is x1, y1, x2, y2), score, class)
        img_matrix.shape (np vector): vector with [Height * Weight * Dimension] values
    """
    # print(img_matrix.shape)
    # for img_matrix, it's HxWxD. Need to resize it for the confusion matrix

    inference_inputs = {"img": img_matrix}

    # modify this to change the run to your model's inference method eg model(img) in pytorch
    inference_outputs = model.run(inference_inputs)

    bbox_labels = inference_outputs["bbox_labels"]
    bbox_labels = convert_labels(labels_dict, bbox_labels)
    bboxes = inference_outputs["bboxes"]
    bbox_scores = inference_outputs["bbox_scores"]
    
    # stack the bbox_scores and bbox_labels
    # hence, array(['score', 'score','score']) and array(['class','class','class'])
    # becomes array([['score','class'], ['score','class'],['score','class']])
    stacked = np.stack((bbox_scores, bbox_labels), axis = 1)

    # concatenate the values of the bbox wih the stacked values above
    # use concatenate here because it is 1xnxm with 1xnxl dimension so it works
    # it's just maths, people!
    concated = np.concatenate((bboxes, stacked), axis = 1)

    return concated.astype(np.float32), img_matrix.shape


class Inference:

    def __init__(self, model, cfg_obj):
        
        self.model = model
        self.labels_dict = cfg_obj['error_analysis']['labels_dict']
        
    def run_inference_path(self, img_path):
        """use if img_path is specified 

        Args:
            img_path (_type_): _description_

        Returns:
            _type_: _description_
        """
        image_orig = cv2.imread(img_path)
        image_orig = cv2.cvtColor(image_orig, cv2.COLOR_BGR2RGB)

        output = run_inference(image_orig, self.model, labels_dict = self.labels_dict)

        return output
    
    def run_inference_byte(self, img_bytes):
        """use if the img_bytes is passed in instead of path

        Args:
            img_bytes (_type_): _description_

        Returns:
            _type_: _description_
        """
        img_decoded = cv2.imdecode(np.frombuffer(img_bytes, np.uint8), -1)
        img_decoded = cv2.cvtColor(img_decoded, cv2.COLOR_BGR2RGB)

        output = run_inference(img_decoded, self.model, labels_dict = self.labels_dict)

        return output

if __name__ == "__main__":
    import yaml
    cfg_file = open(cfg_path)
    cfg_obj = yaml.load(cfg_file, Loader=yaml.FullLoader)
    img_path = "./data/annotations_trainval2017/coco_person/000000000139.jpg"
    inference_obj = Inference(model = pkd_yolo.Node(model_type = "v4tiny", detect= ["Person"] , cfg_obj = cfg_obj))
    print (inference_obj.run_inference_path(img_path))