Spaces:
Paused
Paused
// Copyright (c) Facebook, Inc. and its affiliates. | |
using namespace pybind11::literals; | |
namespace detectron2 { | |
namespace COCOeval { | |
// Sort detections from highest score to lowest, such that | |
// detection_instances[detection_sorted_indices[t]] >= | |
// detection_instances[detection_sorted_indices[t+1]]. Use stable_sort to match | |
// original COCO API | |
void SortInstancesByDetectionScore( | |
const std::vector<InstanceAnnotation>& detection_instances, | |
std::vector<uint64_t>* detection_sorted_indices) { | |
detection_sorted_indices->resize(detection_instances.size()); | |
std::iota( | |
detection_sorted_indices->begin(), detection_sorted_indices->end(), 0); | |
std::stable_sort( | |
detection_sorted_indices->begin(), | |
detection_sorted_indices->end(), | |
[&detection_instances](size_t j1, size_t j2) { | |
return detection_instances[j1].score > detection_instances[j2].score; | |
}); | |
} | |
// Partition the ground truth objects based on whether or not to ignore them | |
// based on area | |
void SortInstancesByIgnore( | |
const std::array<double, 2>& area_range, | |
const std::vector<InstanceAnnotation>& ground_truth_instances, | |
std::vector<uint64_t>* ground_truth_sorted_indices, | |
std::vector<bool>* ignores) { | |
ignores->clear(); | |
ignores->reserve(ground_truth_instances.size()); | |
for (auto o : ground_truth_instances) { | |
ignores->push_back( | |
o.ignore || o.area < area_range[0] || o.area > area_range[1]); | |
} | |
ground_truth_sorted_indices->resize(ground_truth_instances.size()); | |
std::iota( | |
ground_truth_sorted_indices->begin(), | |
ground_truth_sorted_indices->end(), | |
0); | |
std::stable_sort( | |
ground_truth_sorted_indices->begin(), | |
ground_truth_sorted_indices->end(), | |
[&ignores](size_t j1, size_t j2) { | |
return (int)(*ignores)[j1] < (int)(*ignores)[j2]; | |
}); | |
} | |
// For each IOU threshold, greedily match each detected instance to a ground | |
// truth instance (if possible) and store the results | |
void MatchDetectionsToGroundTruth( | |
const std::vector<InstanceAnnotation>& detection_instances, | |
const std::vector<uint64_t>& detection_sorted_indices, | |
const std::vector<InstanceAnnotation>& ground_truth_instances, | |
const std::vector<uint64_t>& ground_truth_sorted_indices, | |
const std::vector<bool>& ignores, | |
const std::vector<std::vector<double>>& ious, | |
const std::vector<double>& iou_thresholds, | |
const std::array<double, 2>& area_range, | |
ImageEvaluation* results) { | |
// Initialize memory to store return data matches and ignore | |
const int num_iou_thresholds = iou_thresholds.size(); | |
const int num_ground_truth = ground_truth_sorted_indices.size(); | |
const int num_detections = detection_sorted_indices.size(); | |
std::vector<uint64_t> ground_truth_matches( | |
num_iou_thresholds * num_ground_truth, 0); | |
std::vector<uint64_t>& detection_matches = results->detection_matches; | |
std::vector<bool>& detection_ignores = results->detection_ignores; | |
std::vector<bool>& ground_truth_ignores = results->ground_truth_ignores; | |
detection_matches.resize(num_iou_thresholds * num_detections, 0); | |
detection_ignores.resize(num_iou_thresholds * num_detections, false); | |
ground_truth_ignores.resize(num_ground_truth); | |
for (auto g = 0; g < num_ground_truth; ++g) { | |
ground_truth_ignores[g] = ignores[ground_truth_sorted_indices[g]]; | |
} | |
for (auto t = 0; t < num_iou_thresholds; ++t) { | |
for (auto d = 0; d < num_detections; ++d) { | |
// information about best match so far (match=-1 -> unmatched) | |
double best_iou = std::min(iou_thresholds[t], 1 - 1e-10); | |
int match = -1; | |
for (auto g = 0; g < num_ground_truth; ++g) { | |
// if this ground truth instance is already matched and not a | |
// crowd, it cannot be matched to another detection | |
if (ground_truth_matches[t * num_ground_truth + g] > 0 && | |
!ground_truth_instances[ground_truth_sorted_indices[g]].is_crowd) { | |
continue; | |
} | |
// if detected instance matched to a regular ground truth | |
// instance, we can break on the first ground truth instance | |
// tagged as ignore (because they are sorted by the ignore tag) | |
if (match >= 0 && !ground_truth_ignores[match] && | |
ground_truth_ignores[g]) { | |
break; | |
} | |
// if IOU overlap is the best so far, store the match appropriately | |
if (ious[d][ground_truth_sorted_indices[g]] >= best_iou) { | |
best_iou = ious[d][ground_truth_sorted_indices[g]]; | |
match = g; | |
} | |
} | |
// if match was made, store id of match for both detection and | |
// ground truth | |
if (match >= 0) { | |
detection_ignores[t * num_detections + d] = ground_truth_ignores[match]; | |
detection_matches[t * num_detections + d] = | |
ground_truth_instances[ground_truth_sorted_indices[match]].id; | |
ground_truth_matches[t * num_ground_truth + match] = | |
detection_instances[detection_sorted_indices[d]].id; | |
} | |
// set unmatched detections outside of area range to ignore | |
const InstanceAnnotation& detection = | |
detection_instances[detection_sorted_indices[d]]; | |
detection_ignores[t * num_detections + d] = | |
detection_ignores[t * num_detections + d] || | |
(detection_matches[t * num_detections + d] == 0 && | |
(detection.area < area_range[0] || detection.area > area_range[1])); | |
} | |
} | |
// store detection score results | |
results->detection_scores.resize(detection_sorted_indices.size()); | |
for (size_t d = 0; d < detection_sorted_indices.size(); ++d) { | |
results->detection_scores[d] = | |
detection_instances[detection_sorted_indices[d]].score; | |
} | |
} | |
std::vector<ImageEvaluation> EvaluateImages( | |
const std::vector<std::array<double, 2>>& area_ranges, | |
int max_detections, | |
const std::vector<double>& iou_thresholds, | |
const ImageCategoryInstances<std::vector<double>>& image_category_ious, | |
const ImageCategoryInstances<InstanceAnnotation>& | |
image_category_ground_truth_instances, | |
const ImageCategoryInstances<InstanceAnnotation>& | |
image_category_detection_instances) { | |
const int num_area_ranges = area_ranges.size(); | |
const int num_images = image_category_ground_truth_instances.size(); | |
const int num_categories = | |
image_category_ious.size() > 0 ? image_category_ious[0].size() : 0; | |
std::vector<uint64_t> detection_sorted_indices; | |
std::vector<uint64_t> ground_truth_sorted_indices; | |
std::vector<bool> ignores; | |
std::vector<ImageEvaluation> results_all( | |
num_images * num_area_ranges * num_categories); | |
// Store results for each image, category, and area range combination. Results | |
// for each IOU threshold are packed into the same ImageEvaluation object | |
for (auto i = 0; i < num_images; ++i) { | |
for (auto c = 0; c < num_categories; ++c) { | |
const std::vector<InstanceAnnotation>& ground_truth_instances = | |
image_category_ground_truth_instances[i][c]; | |
const std::vector<InstanceAnnotation>& detection_instances = | |
image_category_detection_instances[i][c]; | |
SortInstancesByDetectionScore( | |
detection_instances, &detection_sorted_indices); | |
if ((int)detection_sorted_indices.size() > max_detections) { | |
detection_sorted_indices.resize(max_detections); | |
} | |
for (size_t a = 0; a < area_ranges.size(); ++a) { | |
SortInstancesByIgnore( | |
area_ranges[a], | |
ground_truth_instances, | |
&ground_truth_sorted_indices, | |
&ignores); | |
MatchDetectionsToGroundTruth( | |
detection_instances, | |
detection_sorted_indices, | |
ground_truth_instances, | |
ground_truth_sorted_indices, | |
ignores, | |
image_category_ious[i][c], | |
iou_thresholds, | |
area_ranges[a], | |
&results_all | |
[c * num_area_ranges * num_images + a * num_images + i]); | |
} | |
} | |
} | |
return results_all; | |
} | |
// Convert a python list to a vector | |
template <typename T> | |
std::vector<T> list_to_vec(const py::list& l) { | |
std::vector<T> v(py::len(l)); | |
for (int i = 0; i < (int)py::len(l); ++i) { | |
v[i] = l[i].cast<T>(); | |
} | |
return v; | |
} | |
// Helper function to Accumulate() | |
// Considers the evaluation results applicable to a particular category, area | |
// range, and max_detections parameter setting, which begin at | |
// evaluations[evaluation_index]. Extracts a sorted list of length n of all | |
// applicable detection instances concatenated across all images in the dataset, | |
// which are represented by the outputs evaluation_indices, detection_scores, | |
// image_detection_indices, and detection_sorted_indices--all of which are | |
// length n. evaluation_indices[i] stores the applicable index into | |
// evaluations[] for instance i, which has detection score detection_score[i], | |
// and is the image_detection_indices[i]'th of the list of detections | |
// for the image containing i. detection_sorted_indices[] defines a sorted | |
// permutation of the 3 other outputs | |
int BuildSortedDetectionList( | |
const std::vector<ImageEvaluation>& evaluations, | |
const int64_t evaluation_index, | |
const int64_t num_images, | |
const int max_detections, | |
std::vector<uint64_t>* evaluation_indices, | |
std::vector<double>* detection_scores, | |
std::vector<uint64_t>* detection_sorted_indices, | |
std::vector<uint64_t>* image_detection_indices) { | |
assert(evaluations.size() >= evaluation_index + num_images); | |
// Extract a list of object instances of the applicable category, area | |
// range, and max detections requirements such that they can be sorted | |
image_detection_indices->clear(); | |
evaluation_indices->clear(); | |
detection_scores->clear(); | |
image_detection_indices->reserve(num_images * max_detections); | |
evaluation_indices->reserve(num_images * max_detections); | |
detection_scores->reserve(num_images * max_detections); | |
int num_valid_ground_truth = 0; | |
for (auto i = 0; i < num_images; ++i) { | |
const ImageEvaluation& evaluation = evaluations[evaluation_index + i]; | |
for (int d = 0; | |
d < (int)evaluation.detection_scores.size() && d < max_detections; | |
++d) { // detected instances | |
evaluation_indices->push_back(evaluation_index + i); | |
image_detection_indices->push_back(d); | |
detection_scores->push_back(evaluation.detection_scores[d]); | |
} | |
for (auto ground_truth_ignore : evaluation.ground_truth_ignores) { | |
if (!ground_truth_ignore) { | |
++num_valid_ground_truth; | |
} | |
} | |
} | |
// Sort detections by decreasing score, using stable sort to match | |
// python implementation | |
detection_sorted_indices->resize(detection_scores->size()); | |
std::iota( | |
detection_sorted_indices->begin(), detection_sorted_indices->end(), 0); | |
std::stable_sort( | |
detection_sorted_indices->begin(), | |
detection_sorted_indices->end(), | |
[&detection_scores](size_t j1, size_t j2) { | |
return (*detection_scores)[j1] > (*detection_scores)[j2]; | |
}); | |
return num_valid_ground_truth; | |
} | |
// Helper function to Accumulate() | |
// Compute a precision recall curve given a sorted list of detected instances | |
// encoded in evaluations, evaluation_indices, detection_scores, | |
// detection_sorted_indices, image_detection_indices (see | |
// BuildSortedDetectionList()). Using vectors precisions and recalls | |
// and temporary storage, output the results into precisions_out, recalls_out, | |
// and scores_out, which are large buffers containing many precion/recall curves | |
// for all possible parameter settings, with precisions_out_index and | |
// recalls_out_index defining the applicable indices to store results. | |
void ComputePrecisionRecallCurve( | |
const int64_t precisions_out_index, | |
const int64_t precisions_out_stride, | |
const int64_t recalls_out_index, | |
const std::vector<double>& recall_thresholds, | |
const int iou_threshold_index, | |
const int num_iou_thresholds, | |
const int num_valid_ground_truth, | |
const std::vector<ImageEvaluation>& evaluations, | |
const std::vector<uint64_t>& evaluation_indices, | |
const std::vector<double>& detection_scores, | |
const std::vector<uint64_t>& detection_sorted_indices, | |
const std::vector<uint64_t>& image_detection_indices, | |
std::vector<double>* precisions, | |
std::vector<double>* recalls, | |
std::vector<double>* precisions_out, | |
std::vector<double>* scores_out, | |
std::vector<double>* recalls_out) { | |
assert(recalls_out->size() > recalls_out_index); | |
// Compute precision/recall for each instance in the sorted list of detections | |
int64_t true_positives_sum = 0, false_positives_sum = 0; | |
precisions->clear(); | |
recalls->clear(); | |
precisions->reserve(detection_sorted_indices.size()); | |
recalls->reserve(detection_sorted_indices.size()); | |
assert(!evaluations.empty() || detection_sorted_indices.empty()); | |
for (auto detection_sorted_index : detection_sorted_indices) { | |
const ImageEvaluation& evaluation = | |
evaluations[evaluation_indices[detection_sorted_index]]; | |
const auto num_detections = | |
evaluation.detection_matches.size() / num_iou_thresholds; | |
const auto detection_index = iou_threshold_index * num_detections + | |
image_detection_indices[detection_sorted_index]; | |
assert(evaluation.detection_matches.size() > detection_index); | |
assert(evaluation.detection_ignores.size() > detection_index); | |
const int64_t detection_match = | |
evaluation.detection_matches[detection_index]; | |
const bool detection_ignores = | |
evaluation.detection_ignores[detection_index]; | |
const auto true_positive = detection_match > 0 && !detection_ignores; | |
const auto false_positive = detection_match == 0 && !detection_ignores; | |
if (true_positive) { | |
++true_positives_sum; | |
} | |
if (false_positive) { | |
++false_positives_sum; | |
} | |
const double recall = | |
static_cast<double>(true_positives_sum) / num_valid_ground_truth; | |
recalls->push_back(recall); | |
const int64_t num_valid_detections = | |
true_positives_sum + false_positives_sum; | |
const double precision = num_valid_detections > 0 | |
? static_cast<double>(true_positives_sum) / num_valid_detections | |
: 0.0; | |
precisions->push_back(precision); | |
} | |
(*recalls_out)[recalls_out_index] = !recalls->empty() ? recalls->back() : 0; | |
for (int64_t i = static_cast<int64_t>(precisions->size()) - 1; i > 0; --i) { | |
if ((*precisions)[i] > (*precisions)[i - 1]) { | |
(*precisions)[i - 1] = (*precisions)[i]; | |
} | |
} | |
// Sample the per instance precision/recall list at each recall threshold | |
for (size_t r = 0; r < recall_thresholds.size(); ++r) { | |
// first index in recalls >= recall_thresholds[r] | |
std::vector<double>::iterator low = std::lower_bound( | |
recalls->begin(), recalls->end(), recall_thresholds[r]); | |
size_t precisions_index = low - recalls->begin(); | |
const auto results_ind = precisions_out_index + r * precisions_out_stride; | |
assert(results_ind < precisions_out->size()); | |
assert(results_ind < scores_out->size()); | |
if (precisions_index < precisions->size()) { | |
(*precisions_out)[results_ind] = (*precisions)[precisions_index]; | |
(*scores_out)[results_ind] = | |
detection_scores[detection_sorted_indices[precisions_index]]; | |
} else { | |
(*precisions_out)[results_ind] = 0; | |
(*scores_out)[results_ind] = 0; | |
} | |
} | |
} | |
py::dict Accumulate( | |
const py::object& params, | |
const std::vector<ImageEvaluation>& evaluations) { | |
const std::vector<double> recall_thresholds = | |
list_to_vec<double>(params.attr("recThrs")); | |
const std::vector<int> max_detections = | |
list_to_vec<int>(params.attr("maxDets")); | |
const int num_iou_thresholds = py::len(params.attr("iouThrs")); | |
const int num_recall_thresholds = py::len(params.attr("recThrs")); | |
const int num_categories = params.attr("useCats").cast<int>() == 1 | |
? py::len(params.attr("catIds")) | |
: 1; | |
const int num_area_ranges = py::len(params.attr("areaRng")); | |
const int num_max_detections = py::len(params.attr("maxDets")); | |
const int num_images = py::len(params.attr("imgIds")); | |
std::vector<double> precisions_out( | |
num_iou_thresholds * num_recall_thresholds * num_categories * | |
num_area_ranges * num_max_detections, | |
-1); | |
std::vector<double> recalls_out( | |
num_iou_thresholds * num_categories * num_area_ranges * | |
num_max_detections, | |
-1); | |
std::vector<double> scores_out( | |
num_iou_thresholds * num_recall_thresholds * num_categories * | |
num_area_ranges * num_max_detections, | |
-1); | |
// Consider the list of all detected instances in the entire dataset in one | |
// large list. evaluation_indices, detection_scores, | |
// image_detection_indices, and detection_sorted_indices all have the same | |
// length as this list, such that each entry corresponds to one detected | |
// instance | |
std::vector<uint64_t> evaluation_indices; // indices into evaluations[] | |
std::vector<double> detection_scores; // detection scores of each instance | |
std::vector<uint64_t> detection_sorted_indices; // sorted indices of all | |
// instances in the dataset | |
std::vector<uint64_t> | |
image_detection_indices; // indices into the list of detected instances in | |
// the same image as each instance | |
std::vector<double> precisions, recalls; | |
for (auto c = 0; c < num_categories; ++c) { | |
for (auto a = 0; a < num_area_ranges; ++a) { | |
for (auto m = 0; m < num_max_detections; ++m) { | |
// The COCO PythonAPI assumes evaluations[] (the return value of | |
// COCOeval::EvaluateImages() is one long list storing results for each | |
// combination of category, area range, and image id, with categories in | |
// the outermost loop and images in the innermost loop. | |
const int64_t evaluations_index = | |
c * num_area_ranges * num_images + a * num_images; | |
int num_valid_ground_truth = BuildSortedDetectionList( | |
evaluations, | |
evaluations_index, | |
num_images, | |
max_detections[m], | |
&evaluation_indices, | |
&detection_scores, | |
&detection_sorted_indices, | |
&image_detection_indices); | |
if (num_valid_ground_truth == 0) { | |
continue; | |
} | |
for (auto t = 0; t < num_iou_thresholds; ++t) { | |
// recalls_out is a flattened vectors representing a | |
// num_iou_thresholds X num_categories X num_area_ranges X | |
// num_max_detections matrix | |
const int64_t recalls_out_index = | |
t * num_categories * num_area_ranges * num_max_detections + | |
c * num_area_ranges * num_max_detections + | |
a * num_max_detections + m; | |
// precisions_out and scores_out are flattened vectors | |
// representing a num_iou_thresholds X num_recall_thresholds X | |
// num_categories X num_area_ranges X num_max_detections matrix | |
const int64_t precisions_out_stride = | |
num_categories * num_area_ranges * num_max_detections; | |
const int64_t precisions_out_index = t * num_recall_thresholds * | |
num_categories * num_area_ranges * num_max_detections + | |
c * num_area_ranges * num_max_detections + | |
a * num_max_detections + m; | |
ComputePrecisionRecallCurve( | |
precisions_out_index, | |
precisions_out_stride, | |
recalls_out_index, | |
recall_thresholds, | |
t, | |
num_iou_thresholds, | |
num_valid_ground_truth, | |
evaluations, | |
evaluation_indices, | |
detection_scores, | |
detection_sorted_indices, | |
image_detection_indices, | |
&precisions, | |
&recalls, | |
&precisions_out, | |
&scores_out, | |
&recalls_out); | |
} | |
} | |
} | |
} | |
time_t rawtime; | |
struct tm local_time; | |
std::array<char, 200> buffer; | |
time(&rawtime); | |
localtime_s(&local_time, &rawtime); | |
localtime_r(&rawtime, &local_time); | |
strftime( | |
buffer.data(), 200, "%Y-%m-%d %H:%num_max_detections:%S", &local_time); | |
return py::dict( | |
"params"_a = params, | |
"counts"_a = std::vector<int64_t>( | |
{num_iou_thresholds, | |
num_recall_thresholds, | |
num_categories, | |
num_area_ranges, | |
num_max_detections}), | |
"date"_a = buffer, | |
"precision"_a = precisions_out, | |
"recall"_a = recalls_out, | |
"scores"_a = scores_out); | |
} | |
} // namespace COCOeval | |
} // namespace detectron2 | |