Spaces:
Runtime error
Runtime error
File size: 6,564 Bytes
37f5c2f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 |
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
# Copyright (c) Megvii, Inc. and its affiliates.
import sys
import tempfile
import time
from collections import ChainMap
from loguru import logger
from tqdm import tqdm
import numpy as np
import torch
from yolox.utils import gather, is_main_process, postprocess, synchronize, time_synchronized
class VOCEvaluator:
"""
VOC AP Evaluation class.
"""
def __init__(self, dataloader, img_size, confthre, nmsthre, num_classes):
"""
Args:
dataloader (Dataloader): evaluate dataloader.
img_size (int): image size after preprocess. images are resized
to squares whose shape is (img_size, img_size).
confthre (float): confidence threshold ranging from 0 to 1, which
is defined in the config file.
nmsthre (float): IoU threshold of non-max supression ranging from 0 to 1.
"""
self.dataloader = dataloader
self.img_size = img_size
self.confthre = confthre
self.nmsthre = nmsthre
self.num_classes = num_classes
self.num_images = len(dataloader.dataset)
def evaluate(
self, model, distributed=False, half=False, trt_file=None,
decoder=None, test_size=None, return_outputs=False,
):
"""
VOC average precision (AP) Evaluation. Iterate inference on the test dataset
and the results are evaluated by COCO API.
NOTE: This function will change training mode to False, please save states if needed.
Args:
model : model to evaluate.
Returns:
ap50_95 (float) : COCO style AP of IoU=50:95
ap50 (float) : VOC 2007 metric AP of IoU=50
summary (sr): summary info of evaluation.
"""
# TODO half to amp_test
tensor_type = torch.cuda.HalfTensor if half else torch.cuda.FloatTensor
model = model.eval()
if half:
model = model.half()
ids = []
data_list = {}
progress_bar = tqdm if is_main_process() else iter
inference_time = 0
nms_time = 0
n_samples = max(len(self.dataloader) - 1, 1)
if trt_file is not None:
from torch2trt import TRTModule
model_trt = TRTModule()
model_trt.load_state_dict(torch.load(trt_file))
x = torch.ones(1, 3, test_size[0], test_size[1]).cuda()
model(x)
model = model_trt
for cur_iter, (imgs, _, info_imgs, ids) in enumerate(progress_bar(self.dataloader)):
with torch.no_grad():
imgs = imgs.type(tensor_type)
# skip the last iters since batchsize might be not enough for batch inference
is_time_record = cur_iter < len(self.dataloader) - 1
if is_time_record:
start = time.time()
outputs = model(imgs)
if decoder is not None:
outputs = decoder(outputs, dtype=outputs.type())
if is_time_record:
infer_end = time_synchronized()
inference_time += infer_end - start
outputs = postprocess(
outputs, self.num_classes, self.confthre, self.nmsthre
)
if is_time_record:
nms_end = time_synchronized()
nms_time += nms_end - infer_end
data_list.update(self.convert_to_voc_format(outputs, info_imgs, ids))
statistics = torch.cuda.FloatTensor([inference_time, nms_time, n_samples])
if distributed:
data_list = gather(data_list, dst=0)
data_list = ChainMap(*data_list)
torch.distributed.reduce(statistics, dst=0)
eval_results = self.evaluate_prediction(data_list, statistics)
synchronize()
if return_outputs:
return eval_results, data_list
return eval_results
def convert_to_voc_format(self, outputs, info_imgs, ids):
predictions = {}
for output, img_h, img_w, img_id in zip(outputs, info_imgs[0], info_imgs[1], ids):
if output is None:
predictions[int(img_id)] = (None, None, None)
continue
output = output.cpu()
bboxes = output[:, 0:4]
# preprocessing: resize
scale = min(self.img_size[0] / float(img_h), self.img_size[1] / float(img_w))
bboxes /= scale
cls = output[:, 6]
scores = output[:, 4] * output[:, 5]
predictions[int(img_id)] = (bboxes, cls, scores)
return predictions
def evaluate_prediction(self, data_dict, statistics):
if not is_main_process():
return 0, 0, None
logger.info("Evaluate in main process...")
inference_time = statistics[0].item()
nms_time = statistics[1].item()
n_samples = statistics[2].item()
a_infer_time = 1000 * inference_time / (n_samples * self.dataloader.batch_size)
a_nms_time = 1000 * nms_time / (n_samples * self.dataloader.batch_size)
time_info = ", ".join(
[
"Average {} time: {:.2f} ms".format(k, v)
for k, v in zip(
["forward", "NMS", "inference"],
[a_infer_time, a_nms_time, (a_infer_time + a_nms_time)],
)
]
)
info = time_info + "\n"
all_boxes = [
[[] for _ in range(self.num_images)] for _ in range(self.num_classes)
]
for img_num in range(self.num_images):
bboxes, cls, scores = data_dict[img_num]
if bboxes is None:
for j in range(self.num_classes):
all_boxes[j][img_num] = np.empty([0, 5], dtype=np.float32)
continue
for j in range(self.num_classes):
mask_c = cls == j
if sum(mask_c) == 0:
all_boxes[j][img_num] = np.empty([0, 5], dtype=np.float32)
continue
c_dets = torch.cat((bboxes, scores.unsqueeze(1)), dim=1)
all_boxes[j][img_num] = c_dets[mask_c].numpy()
sys.stdout.write(f"im_eval: {img_num + 1}/{self.num_images} \r")
sys.stdout.flush()
with tempfile.TemporaryDirectory() as tempdir:
mAP50, mAP70 = self.dataloader.dataset.evaluate_detections(all_boxes, tempdir)
return mAP50, mAP70, info
|