|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Common utility functions for evaluation.""" |
|
from __future__ import absolute_import |
|
from __future__ import division |
|
from __future__ import print_function |
|
|
|
import collections |
|
import os |
|
import re |
|
import time |
|
|
|
import numpy as np |
|
from six.moves import range |
|
import tensorflow.compat.v1 as tf |
|
|
|
import tf_slim as slim |
|
|
|
from object_detection.core import box_list |
|
from object_detection.core import box_list_ops |
|
from object_detection.core import keypoint_ops |
|
from object_detection.core import standard_fields as fields |
|
from object_detection.metrics import coco_evaluation |
|
from object_detection.protos import eval_pb2 |
|
from object_detection.utils import label_map_util |
|
from object_detection.utils import object_detection_evaluation |
|
from object_detection.utils import ops |
|
from object_detection.utils import shape_utils |
|
from object_detection.utils import visualization_utils as vis_utils |
|
|
|
EVAL_KEYPOINT_METRIC = 'coco_keypoint_metrics' |
|
|
|
|
|
|
|
|
|
EVAL_METRICS_CLASS_DICT = { |
|
'coco_detection_metrics': |
|
coco_evaluation.CocoDetectionEvaluator, |
|
'coco_keypoint_metrics': |
|
coco_evaluation.CocoKeypointEvaluator, |
|
'coco_mask_metrics': |
|
coco_evaluation.CocoMaskEvaluator, |
|
'coco_panoptic_metrics': |
|
coco_evaluation.CocoPanopticSegmentationEvaluator, |
|
'oid_challenge_detection_metrics': |
|
object_detection_evaluation.OpenImagesDetectionChallengeEvaluator, |
|
'oid_challenge_segmentation_metrics': |
|
object_detection_evaluation |
|
.OpenImagesInstanceSegmentationChallengeEvaluator, |
|
'pascal_voc_detection_metrics': |
|
object_detection_evaluation.PascalDetectionEvaluator, |
|
'weighted_pascal_voc_detection_metrics': |
|
object_detection_evaluation.WeightedPascalDetectionEvaluator, |
|
'precision_at_recall_detection_metrics': |
|
object_detection_evaluation.PrecisionAtRecallDetectionEvaluator, |
|
'pascal_voc_instance_segmentation_metrics': |
|
object_detection_evaluation.PascalInstanceSegmentationEvaluator, |
|
'weighted_pascal_voc_instance_segmentation_metrics': |
|
object_detection_evaluation.WeightedPascalInstanceSegmentationEvaluator, |
|
'oid_V2_detection_metrics': |
|
object_detection_evaluation.OpenImagesDetectionEvaluator, |
|
} |
|
|
|
EVAL_DEFAULT_METRIC = 'coco_detection_metrics' |
|
|
|
|
|
def write_metrics(metrics, global_step, summary_dir): |
|
"""Write metrics to a summary directory. |
|
|
|
Args: |
|
metrics: A dictionary containing metric names and values. |
|
global_step: Global step at which the metrics are computed. |
|
summary_dir: Directory to write tensorflow summaries to. |
|
""" |
|
tf.logging.info('Writing metrics to tf summary.') |
|
summary_writer = tf.summary.FileWriterCache.get(summary_dir) |
|
for key in sorted(metrics): |
|
summary = tf.Summary(value=[ |
|
tf.Summary.Value(tag=key, simple_value=metrics[key]), |
|
]) |
|
summary_writer.add_summary(summary, global_step) |
|
tf.logging.info('%s: %f', key, metrics[key]) |
|
tf.logging.info('Metrics written to tf summary.') |
|
|
|
|
|
|
|
def visualize_detection_results(result_dict, |
|
tag, |
|
global_step, |
|
categories, |
|
summary_dir='', |
|
export_dir='', |
|
agnostic_mode=False, |
|
show_groundtruth=False, |
|
groundtruth_box_visualization_color='black', |
|
min_score_thresh=.5, |
|
max_num_predictions=20, |
|
skip_scores=False, |
|
skip_labels=False, |
|
keep_image_id_for_visualization_export=False): |
|
"""Visualizes detection results and writes visualizations to image summaries. |
|
|
|
This function visualizes an image with its detected bounding boxes and writes |
|
to image summaries which can be viewed on tensorboard. It optionally also |
|
writes images to a directory. In the case of missing entry in the label map, |
|
unknown class name in the visualization is shown as "N/A". |
|
|
|
Args: |
|
result_dict: a dictionary holding groundtruth and detection |
|
data corresponding to each image being evaluated. The following keys |
|
are required: |
|
'original_image': a numpy array representing the image with shape |
|
[1, height, width, 3] or [1, height, width, 1] |
|
'detection_boxes': a numpy array of shape [N, 4] |
|
'detection_scores': a numpy array of shape [N] |
|
'detection_classes': a numpy array of shape [N] |
|
The following keys are optional: |
|
'groundtruth_boxes': a numpy array of shape [N, 4] |
|
'groundtruth_keypoints': a numpy array of shape [N, num_keypoints, 2] |
|
Detections are assumed to be provided in decreasing order of score and for |
|
display, and we assume that scores are probabilities between 0 and 1. |
|
tag: tensorboard tag (string) to associate with image. |
|
global_step: global step at which the visualization are generated. |
|
categories: a list of dictionaries representing all possible categories. |
|
Each dict in this list has the following keys: |
|
'id': (required) an integer id uniquely identifying this category |
|
'name': (required) string representing category name |
|
e.g., 'cat', 'dog', 'pizza' |
|
'supercategory': (optional) string representing the supercategory |
|
e.g., 'animal', 'vehicle', 'food', etc |
|
summary_dir: the output directory to which the image summaries are written. |
|
export_dir: the output directory to which images are written. If this is |
|
empty (default), then images are not exported. |
|
agnostic_mode: boolean (default: False) controlling whether to evaluate in |
|
class-agnostic mode or not. |
|
show_groundtruth: boolean (default: False) controlling whether to show |
|
groundtruth boxes in addition to detected boxes |
|
groundtruth_box_visualization_color: box color for visualizing groundtruth |
|
boxes |
|
min_score_thresh: minimum score threshold for a box to be visualized |
|
max_num_predictions: maximum number of detections to visualize |
|
skip_scores: whether to skip score when drawing a single detection |
|
skip_labels: whether to skip label when drawing a single detection |
|
keep_image_id_for_visualization_export: whether to keep image identifier in |
|
filename when exported to export_dir |
|
Raises: |
|
ValueError: if result_dict does not contain the expected keys (i.e., |
|
'original_image', 'detection_boxes', 'detection_scores', |
|
'detection_classes') |
|
""" |
|
detection_fields = fields.DetectionResultFields |
|
input_fields = fields.InputDataFields |
|
if not set([ |
|
input_fields.original_image, |
|
detection_fields.detection_boxes, |
|
detection_fields.detection_scores, |
|
detection_fields.detection_classes, |
|
]).issubset(set(result_dict.keys())): |
|
raise ValueError('result_dict does not contain all expected keys.') |
|
if show_groundtruth and input_fields.groundtruth_boxes not in result_dict: |
|
raise ValueError('If show_groundtruth is enabled, result_dict must contain ' |
|
'groundtruth_boxes.') |
|
tf.logging.info('Creating detection visualizations.') |
|
category_index = label_map_util.create_category_index(categories) |
|
|
|
image = np.squeeze(result_dict[input_fields.original_image], axis=0) |
|
if image.shape[2] == 1: |
|
image = np.tile(image, [1, 1, 3]) |
|
detection_boxes = result_dict[detection_fields.detection_boxes] |
|
detection_scores = result_dict[detection_fields.detection_scores] |
|
detection_classes = np.int32((result_dict[ |
|
detection_fields.detection_classes])) |
|
detection_keypoints = result_dict.get(detection_fields.detection_keypoints) |
|
detection_masks = result_dict.get(detection_fields.detection_masks) |
|
detection_boundaries = result_dict.get(detection_fields.detection_boundaries) |
|
|
|
|
|
if show_groundtruth: |
|
groundtruth_boxes = result_dict[input_fields.groundtruth_boxes] |
|
groundtruth_keypoints = result_dict.get(input_fields.groundtruth_keypoints) |
|
vis_utils.visualize_boxes_and_labels_on_image_array( |
|
image=image, |
|
boxes=groundtruth_boxes, |
|
classes=None, |
|
scores=None, |
|
category_index=category_index, |
|
keypoints=groundtruth_keypoints, |
|
use_normalized_coordinates=False, |
|
max_boxes_to_draw=None, |
|
groundtruth_box_visualization_color=groundtruth_box_visualization_color) |
|
vis_utils.visualize_boxes_and_labels_on_image_array( |
|
image, |
|
detection_boxes, |
|
detection_classes, |
|
detection_scores, |
|
category_index, |
|
instance_masks=detection_masks, |
|
instance_boundaries=detection_boundaries, |
|
keypoints=detection_keypoints, |
|
use_normalized_coordinates=False, |
|
max_boxes_to_draw=max_num_predictions, |
|
min_score_thresh=min_score_thresh, |
|
agnostic_mode=agnostic_mode, |
|
skip_scores=skip_scores, |
|
skip_labels=skip_labels) |
|
|
|
if export_dir: |
|
if keep_image_id_for_visualization_export and result_dict[fields. |
|
InputDataFields() |
|
.key]: |
|
export_path = os.path.join(export_dir, 'export-{}-{}.png'.format( |
|
tag, result_dict[fields.InputDataFields().key])) |
|
else: |
|
export_path = os.path.join(export_dir, 'export-{}.png'.format(tag)) |
|
vis_utils.save_image_array_as_png(image, export_path) |
|
|
|
summary = tf.Summary(value=[ |
|
tf.Summary.Value( |
|
tag=tag, |
|
image=tf.Summary.Image( |
|
encoded_image_string=vis_utils.encode_image_array_as_png_str( |
|
image))) |
|
]) |
|
summary_writer = tf.summary.FileWriterCache.get(summary_dir) |
|
summary_writer.add_summary(summary, global_step) |
|
|
|
tf.logging.info('Detection visualizations written to summary with tag %s.', |
|
tag) |
|
|
|
|
|
def _run_checkpoint_once(tensor_dict, |
|
evaluators=None, |
|
batch_processor=None, |
|
checkpoint_dirs=None, |
|
variables_to_restore=None, |
|
restore_fn=None, |
|
num_batches=1, |
|
master='', |
|
save_graph=False, |
|
save_graph_dir='', |
|
losses_dict=None, |
|
eval_export_path=None, |
|
process_metrics_fn=None): |
|
"""Evaluates metrics defined in evaluators and returns summaries. |
|
|
|
This function loads the latest checkpoint in checkpoint_dirs and evaluates |
|
all metrics defined in evaluators. The metrics are processed in batch by the |
|
batch_processor. |
|
|
|
Args: |
|
tensor_dict: a dictionary holding tensors representing a batch of detections |
|
and corresponding groundtruth annotations. |
|
evaluators: a list of object of type DetectionEvaluator to be used for |
|
evaluation. Note that the metric names produced by different evaluators |
|
must be unique. |
|
batch_processor: a function taking four arguments: |
|
1. tensor_dict: the same tensor_dict that is passed in as the first |
|
argument to this function. |
|
2. sess: a tensorflow session |
|
3. batch_index: an integer representing the index of the batch amongst |
|
all batches |
|
By default, batch_processor is None, which defaults to running: |
|
return sess.run(tensor_dict) |
|
To skip an image, it suffices to return an empty dictionary in place of |
|
result_dict. |
|
checkpoint_dirs: list of directories to load into an EnsembleModel. If it |
|
has only one directory, EnsembleModel will not be used -- |
|
a DetectionModel |
|
will be instantiated directly. Not used if restore_fn is set. |
|
variables_to_restore: None, or a dictionary mapping variable names found in |
|
a checkpoint to model variables. The dictionary would normally be |
|
generated by creating a tf.train.ExponentialMovingAverage object and |
|
calling its variables_to_restore() method. Not used if restore_fn is set. |
|
restore_fn: None, or a function that takes a tf.Session object and correctly |
|
restores all necessary variables from the correct checkpoint file. If |
|
None, attempts to restore from the first directory in checkpoint_dirs. |
|
num_batches: the number of batches to use for evaluation. |
|
master: the location of the Tensorflow session. |
|
save_graph: whether or not the Tensorflow graph is stored as a pbtxt file. |
|
save_graph_dir: where to store the Tensorflow graph on disk. If save_graph |
|
is True this must be non-empty. |
|
losses_dict: optional dictionary of scalar detection losses. |
|
eval_export_path: Path for saving a json file that contains the detection |
|
results in json format. |
|
process_metrics_fn: a callback called with evaluation results after each |
|
evaluation is done. It could be used e.g. to back up checkpoints with |
|
best evaluation scores, or to call an external system to update evaluation |
|
results in order to drive best hyper-parameter search. Parameters are: |
|
int checkpoint_number, Dict[str, ObjectDetectionEvalMetrics] metrics, |
|
str checkpoint_file path. |
|
|
|
Returns: |
|
global_step: the count of global steps. |
|
all_evaluator_metrics: A dictionary containing metric names and values. |
|
|
|
Raises: |
|
ValueError: if restore_fn is None and checkpoint_dirs doesn't have at least |
|
one element. |
|
ValueError: if save_graph is True and save_graph_dir is not defined. |
|
""" |
|
if save_graph and not save_graph_dir: |
|
raise ValueError('`save_graph_dir` must be defined.') |
|
sess = tf.Session(master, graph=tf.get_default_graph()) |
|
sess.run(tf.global_variables_initializer()) |
|
sess.run(tf.local_variables_initializer()) |
|
sess.run(tf.tables_initializer()) |
|
checkpoint_file = None |
|
if restore_fn: |
|
restore_fn(sess) |
|
else: |
|
if not checkpoint_dirs: |
|
raise ValueError('`checkpoint_dirs` must have at least one entry.') |
|
checkpoint_file = tf.train.latest_checkpoint(checkpoint_dirs[0]) |
|
saver = tf.train.Saver(variables_to_restore) |
|
saver.restore(sess, checkpoint_file) |
|
|
|
if save_graph: |
|
tf.train.write_graph(sess.graph_def, save_graph_dir, 'eval.pbtxt') |
|
|
|
counters = {'skipped': 0, 'success': 0} |
|
aggregate_result_losses_dict = collections.defaultdict(list) |
|
with slim.queues.QueueRunners(sess): |
|
try: |
|
for batch in range(int(num_batches)): |
|
if (batch + 1) % 100 == 0: |
|
tf.logging.info('Running eval ops batch %d/%d', batch + 1, |
|
num_batches) |
|
if not batch_processor: |
|
try: |
|
if not losses_dict: |
|
losses_dict = {} |
|
result_dict, result_losses_dict = sess.run([tensor_dict, |
|
losses_dict]) |
|
counters['success'] += 1 |
|
except tf.errors.InvalidArgumentError: |
|
tf.logging.info('Skipping image') |
|
counters['skipped'] += 1 |
|
result_dict = {} |
|
else: |
|
result_dict, result_losses_dict = batch_processor( |
|
tensor_dict, sess, batch, counters, losses_dict=losses_dict) |
|
if not result_dict: |
|
continue |
|
for key, value in iter(result_losses_dict.items()): |
|
aggregate_result_losses_dict[key].append(value) |
|
for evaluator in evaluators: |
|
|
|
|
|
|
|
|
|
if (isinstance(result_dict, dict) and |
|
fields.InputDataFields.key in result_dict and |
|
result_dict[fields.InputDataFields.key]): |
|
image_id = result_dict[fields.InputDataFields.key] |
|
else: |
|
image_id = batch |
|
evaluator.add_single_ground_truth_image_info( |
|
image_id=image_id, groundtruth_dict=result_dict) |
|
evaluator.add_single_detected_image_info( |
|
image_id=image_id, detections_dict=result_dict) |
|
tf.logging.info('Running eval batches done.') |
|
except tf.errors.OutOfRangeError: |
|
tf.logging.info('Done evaluating -- epoch limit reached') |
|
finally: |
|
|
|
tf.logging.info('# success: %d', counters['success']) |
|
tf.logging.info('# skipped: %d', counters['skipped']) |
|
all_evaluator_metrics = {} |
|
if eval_export_path and eval_export_path is not None: |
|
for evaluator in evaluators: |
|
if (isinstance(evaluator, coco_evaluation.CocoDetectionEvaluator) or |
|
isinstance(evaluator, coco_evaluation.CocoMaskEvaluator)): |
|
tf.logging.info('Started dumping to json file.') |
|
evaluator.dump_detections_to_json_file( |
|
json_output_path=eval_export_path) |
|
tf.logging.info('Finished dumping to json file.') |
|
for evaluator in evaluators: |
|
metrics = evaluator.evaluate() |
|
evaluator.clear() |
|
if any(key in all_evaluator_metrics for key in metrics): |
|
raise ValueError('Metric names between evaluators must not collide.') |
|
all_evaluator_metrics.update(metrics) |
|
global_step = tf.train.global_step(sess, tf.train.get_global_step()) |
|
|
|
for key, value in iter(aggregate_result_losses_dict.items()): |
|
all_evaluator_metrics['Losses/' + key] = np.mean(value) |
|
if process_metrics_fn and checkpoint_file: |
|
m = re.search(r'model.ckpt-(\d+)$', checkpoint_file) |
|
if not m: |
|
tf.logging.error('Failed to parse checkpoint number from: %s', |
|
checkpoint_file) |
|
else: |
|
checkpoint_number = int(m.group(1)) |
|
process_metrics_fn(checkpoint_number, all_evaluator_metrics, |
|
checkpoint_file) |
|
sess.close() |
|
return (global_step, all_evaluator_metrics) |
|
|
|
|
|
|
|
def repeated_checkpoint_run(tensor_dict, |
|
summary_dir, |
|
evaluators, |
|
batch_processor=None, |
|
checkpoint_dirs=None, |
|
variables_to_restore=None, |
|
restore_fn=None, |
|
num_batches=1, |
|
eval_interval_secs=120, |
|
max_number_of_evaluations=None, |
|
max_evaluation_global_step=None, |
|
master='', |
|
save_graph=False, |
|
save_graph_dir='', |
|
losses_dict=None, |
|
eval_export_path=None, |
|
process_metrics_fn=None): |
|
"""Periodically evaluates desired tensors using checkpoint_dirs or restore_fn. |
|
|
|
This function repeatedly loads a checkpoint and evaluates a desired |
|
set of tensors (provided by tensor_dict) and hands the resulting numpy |
|
arrays to a function result_processor which can be used to further |
|
process/save/visualize the results. |
|
|
|
Args: |
|
tensor_dict: a dictionary holding tensors representing a batch of detections |
|
and corresponding groundtruth annotations. |
|
summary_dir: a directory to write metrics summaries. |
|
evaluators: a list of object of type DetectionEvaluator to be used for |
|
evaluation. Note that the metric names produced by different evaluators |
|
must be unique. |
|
batch_processor: a function taking three arguments: |
|
1. tensor_dict: the same tensor_dict that is passed in as the first |
|
argument to this function. |
|
2. sess: a tensorflow session |
|
3. batch_index: an integer representing the index of the batch amongst |
|
all batches |
|
By default, batch_processor is None, which defaults to running: |
|
return sess.run(tensor_dict) |
|
checkpoint_dirs: list of directories to load into a DetectionModel or an |
|
EnsembleModel if restore_fn isn't set. Also used to determine when to run |
|
next evaluation. Must have at least one element. |
|
variables_to_restore: None, or a dictionary mapping variable names found in |
|
a checkpoint to model variables. The dictionary would normally be |
|
generated by creating a tf.train.ExponentialMovingAverage object and |
|
calling its variables_to_restore() method. Not used if restore_fn is set. |
|
restore_fn: a function that takes a tf.Session object and correctly restores |
|
all necessary variables from the correct checkpoint file. |
|
num_batches: the number of batches to use for evaluation. |
|
eval_interval_secs: the number of seconds between each evaluation run. |
|
max_number_of_evaluations: the max number of iterations of the evaluation. |
|
If the value is left as None the evaluation continues indefinitely. |
|
max_evaluation_global_step: global step when evaluation stops. |
|
master: the location of the Tensorflow session. |
|
save_graph: whether or not the Tensorflow graph is saved as a pbtxt file. |
|
save_graph_dir: where to save on disk the Tensorflow graph. If store_graph |
|
is True this must be non-empty. |
|
losses_dict: optional dictionary of scalar detection losses. |
|
eval_export_path: Path for saving a json file that contains the detection |
|
results in json format. |
|
process_metrics_fn: a callback called with evaluation results after each |
|
evaluation is done. It could be used e.g. to back up checkpoints with |
|
best evaluation scores, or to call an external system to update evaluation |
|
results in order to drive best hyper-parameter search. Parameters are: |
|
int checkpoint_number, Dict[str, ObjectDetectionEvalMetrics] metrics, |
|
str checkpoint_file path. |
|
|
|
Returns: |
|
metrics: A dictionary containing metric names and values in the latest |
|
evaluation. |
|
|
|
Raises: |
|
ValueError: if max_num_of_evaluations is not None or a positive number. |
|
ValueError: if checkpoint_dirs doesn't have at least one element. |
|
""" |
|
if max_number_of_evaluations and max_number_of_evaluations <= 0: |
|
raise ValueError( |
|
'`max_number_of_evaluations` must be either None or a positive number.') |
|
if max_evaluation_global_step and max_evaluation_global_step <= 0: |
|
raise ValueError( |
|
'`max_evaluation_global_step` must be either None or positive.') |
|
|
|
if not checkpoint_dirs: |
|
raise ValueError('`checkpoint_dirs` must have at least one entry.') |
|
|
|
last_evaluated_model_path = None |
|
number_of_evaluations = 0 |
|
while True: |
|
start = time.time() |
|
tf.logging.info('Starting evaluation at ' + time.strftime( |
|
'%Y-%m-%d-%H:%M:%S', time.gmtime())) |
|
model_path = tf.train.latest_checkpoint(checkpoint_dirs[0]) |
|
if not model_path: |
|
tf.logging.info('No model found in %s. Will try again in %d seconds', |
|
checkpoint_dirs[0], eval_interval_secs) |
|
elif model_path == last_evaluated_model_path: |
|
tf.logging.info('Found already evaluated checkpoint. Will try again in ' |
|
'%d seconds', eval_interval_secs) |
|
else: |
|
last_evaluated_model_path = model_path |
|
global_step, metrics = _run_checkpoint_once( |
|
tensor_dict, |
|
evaluators, |
|
batch_processor, |
|
checkpoint_dirs, |
|
variables_to_restore, |
|
restore_fn, |
|
num_batches, |
|
master, |
|
save_graph, |
|
save_graph_dir, |
|
losses_dict=losses_dict, |
|
eval_export_path=eval_export_path, |
|
process_metrics_fn=process_metrics_fn) |
|
write_metrics(metrics, global_step, summary_dir) |
|
if (max_evaluation_global_step and |
|
global_step >= max_evaluation_global_step): |
|
tf.logging.info('Finished evaluation!') |
|
break |
|
number_of_evaluations += 1 |
|
|
|
if (max_number_of_evaluations and |
|
number_of_evaluations >= max_number_of_evaluations): |
|
tf.logging.info('Finished evaluation!') |
|
break |
|
time_to_next_eval = start + eval_interval_secs - time.time() |
|
if time_to_next_eval > 0: |
|
time.sleep(time_to_next_eval) |
|
|
|
return metrics |
|
|
|
|
|
def _scale_box_to_absolute(args): |
|
boxes, image_shape = args |
|
return box_list_ops.to_absolute_coordinates( |
|
box_list.BoxList(boxes), image_shape[0], image_shape[1]).get() |
|
|
|
|
|
def _resize_detection_masks(args): |
|
detection_boxes, detection_masks, image_shape = args |
|
detection_masks_reframed = ops.reframe_box_masks_to_image_masks( |
|
detection_masks, detection_boxes, image_shape[0], image_shape[1]) |
|
return tf.cast(tf.greater(detection_masks_reframed, 0.5), tf.uint8) |
|
|
|
|
|
def _resize_groundtruth_masks(args): |
|
"""Resizes groundgtruth masks to the original image size.""" |
|
mask, true_image_shape, original_image_shape = args |
|
true_height = true_image_shape[0] |
|
true_width = true_image_shape[1] |
|
mask = mask[:, :true_height, :true_width] |
|
mask = tf.expand_dims(mask, 3) |
|
mask = tf.image.resize_images( |
|
mask, |
|
original_image_shape, |
|
method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, |
|
align_corners=True) |
|
return tf.cast(tf.squeeze(mask, 3), tf.uint8) |
|
|
|
|
|
def _scale_keypoint_to_absolute(args): |
|
keypoints, image_shape = args |
|
return keypoint_ops.scale(keypoints, image_shape[0], image_shape[1]) |
|
|
|
|
|
def result_dict_for_single_example(image, |
|
key, |
|
detections, |
|
groundtruth=None, |
|
class_agnostic=False, |
|
scale_to_absolute=False): |
|
"""Merges all detection and groundtruth information for a single example. |
|
|
|
Note that evaluation tools require classes that are 1-indexed, and so this |
|
function performs the offset. If `class_agnostic` is True, all output classes |
|
have label 1. |
|
|
|
Args: |
|
image: A single 4D uint8 image tensor of shape [1, H, W, C]. |
|
key: A single string tensor identifying the image. |
|
detections: A dictionary of detections, returned from |
|
DetectionModel.postprocess(). |
|
groundtruth: (Optional) Dictionary of groundtruth items, with fields: |
|
'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in |
|
normalized coordinates. |
|
'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes. |
|
'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional) |
|
'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional) |
|
'groundtruth_difficult': [num_boxes] int64 tensor. (Optional) |
|
'groundtruth_group_of': [num_boxes] int64 tensor. (Optional) |
|
'groundtruth_instance_masks': 3D int64 tensor of instance masks |
|
(Optional). |
|
'groundtruth_keypoints': [num_boxes, num_keypoints, 2] float32 tensor with |
|
keypoints (Optional). |
|
class_agnostic: Boolean indicating whether the detections are class-agnostic |
|
(i.e. binary). Default False. |
|
scale_to_absolute: Boolean indicating whether boxes and keypoints should be |
|
scaled to absolute coordinates. Note that for IoU based evaluations, it |
|
does not matter whether boxes are expressed in absolute or relative |
|
coordinates. Default False. |
|
|
|
Returns: |
|
A dictionary with: |
|
'original_image': A [1, H, W, C] uint8 image tensor. |
|
'key': A string tensor with image identifier. |
|
'detection_boxes': [max_detections, 4] float32 tensor of boxes, in |
|
normalized or absolute coordinates, depending on the value of |
|
`scale_to_absolute`. |
|
'detection_scores': [max_detections] float32 tensor of scores. |
|
'detection_classes': [max_detections] int64 tensor of 1-indexed classes. |
|
'detection_masks': [max_detections, H, W] float32 tensor of binarized |
|
masks, reframed to full image masks. |
|
'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in |
|
normalized or absolute coordinates, depending on the value of |
|
`scale_to_absolute`. (Optional) |
|
'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes. |
|
(Optional) |
|
'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional) |
|
'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional) |
|
'groundtruth_difficult': [num_boxes] int64 tensor. (Optional) |
|
'groundtruth_group_of': [num_boxes] int64 tensor. (Optional) |
|
'groundtruth_instance_masks': 3D int64 tensor of instance masks |
|
(Optional). |
|
'groundtruth_keypoints': [num_boxes, num_keypoints, 2] float32 tensor with |
|
keypoints (Optional). |
|
""" |
|
|
|
if groundtruth: |
|
max_gt_boxes = tf.shape( |
|
groundtruth[fields.InputDataFields.groundtruth_boxes])[0] |
|
for gt_key in groundtruth: |
|
|
|
groundtruth[gt_key] = tf.expand_dims(groundtruth[gt_key], 0) |
|
|
|
for detection_key in detections: |
|
detections[detection_key] = tf.expand_dims( |
|
detections[detection_key][0], axis=0) |
|
|
|
batched_output_dict = result_dict_for_batched_example( |
|
image, |
|
tf.expand_dims(key, 0), |
|
detections, |
|
groundtruth, |
|
class_agnostic, |
|
scale_to_absolute, |
|
max_gt_boxes=max_gt_boxes) |
|
|
|
exclude_keys = [ |
|
fields.InputDataFields.original_image, |
|
fields.DetectionResultFields.num_detections, |
|
fields.InputDataFields.num_groundtruth_boxes |
|
] |
|
|
|
output_dict = { |
|
fields.InputDataFields.original_image: |
|
batched_output_dict[fields.InputDataFields.original_image] |
|
} |
|
|
|
for key in batched_output_dict: |
|
|
|
if key not in exclude_keys: |
|
output_dict[key] = tf.squeeze(batched_output_dict[key], 0) |
|
return output_dict |
|
|
|
|
|
def result_dict_for_batched_example(images, |
|
keys, |
|
detections, |
|
groundtruth=None, |
|
class_agnostic=False, |
|
scale_to_absolute=False, |
|
original_image_spatial_shapes=None, |
|
true_image_shapes=None, |
|
max_gt_boxes=None): |
|
"""Merges all detection and groundtruth information for a single example. |
|
|
|
Note that evaluation tools require classes that are 1-indexed, and so this |
|
function performs the offset. If `class_agnostic` is True, all output classes |
|
have label 1. |
|
The groundtruth coordinates of boxes/keypoints in 'groundtruth' dictionary are |
|
normalized relative to the (potentially padded) input image, while the |
|
coordinates in 'detection' dictionary are normalized relative to the true |
|
image shape. |
|
|
|
Args: |
|
images: A single 4D uint8 image tensor of shape [batch_size, H, W, C]. |
|
keys: A [batch_size] string/int tensor with image identifier. |
|
detections: A dictionary of detections, returned from |
|
DetectionModel.postprocess(). |
|
groundtruth: (Optional) Dictionary of groundtruth items, with fields: |
|
'groundtruth_boxes': [batch_size, max_number_of_boxes, 4] float32 tensor |
|
of boxes, in normalized coordinates. |
|
'groundtruth_classes': [batch_size, max_number_of_boxes] int64 tensor of |
|
1-indexed classes. |
|
'groundtruth_area': [batch_size, max_number_of_boxes] float32 tensor of |
|
bbox area. (Optional) |
|
'groundtruth_is_crowd':[batch_size, max_number_of_boxes] int64 |
|
tensor. (Optional) |
|
'groundtruth_difficult': [batch_size, max_number_of_boxes] int64 |
|
tensor. (Optional) |
|
'groundtruth_group_of': [batch_size, max_number_of_boxes] int64 |
|
tensor. (Optional) |
|
'groundtruth_instance_masks': 4D int64 tensor of instance |
|
masks (Optional). |
|
'groundtruth_keypoints': [batch_size, max_number_of_boxes, num_keypoints, |
|
2] float32 tensor with keypoints (Optional). |
|
'groundtruth_keypoint_visibilities': [batch_size, max_number_of_boxes, |
|
num_keypoints] bool tensor with keypoint visibilities (Optional). |
|
'groundtruth_labeled_classes': [batch_size, num_classes] int64 |
|
tensor of 1-indexed classes. (Optional) |
|
class_agnostic: Boolean indicating whether the detections are class-agnostic |
|
(i.e. binary). Default False. |
|
scale_to_absolute: Boolean indicating whether boxes and keypoints should be |
|
scaled to absolute coordinates. Note that for IoU based evaluations, it |
|
does not matter whether boxes are expressed in absolute or relative |
|
coordinates. Default False. |
|
original_image_spatial_shapes: A 2D int32 tensor of shape [batch_size, 2] |
|
used to resize the image. When set to None, the image size is retained. |
|
true_image_shapes: A 2D int32 tensor of shape [batch_size, 3] |
|
containing the size of the unpadded original_image. |
|
max_gt_boxes: [batch_size] tensor representing the maximum number of |
|
groundtruth boxes to pad. |
|
|
|
Returns: |
|
A dictionary with: |
|
'original_image': A [batch_size, H, W, C] uint8 image tensor. |
|
'original_image_spatial_shape': A [batch_size, 2] tensor containing the |
|
original image sizes. |
|
'true_image_shape': A [batch_size, 3] tensor containing the size of |
|
the unpadded original_image. |
|
'key': A [batch_size] string tensor with image identifier. |
|
'detection_boxes': [batch_size, max_detections, 4] float32 tensor of boxes, |
|
in normalized or absolute coordinates, depending on the value of |
|
`scale_to_absolute`. |
|
'detection_scores': [batch_size, max_detections] float32 tensor of scores. |
|
'detection_classes': [batch_size, max_detections] int64 tensor of 1-indexed |
|
classes. |
|
'detection_masks': [batch_size, max_detections, H, W] float32 tensor of |
|
binarized masks, reframed to full image masks. (Optional) |
|
'detection_keypoints': [batch_size, max_detections, num_keypoints, 2] |
|
float32 tensor containing keypoint coordinates. (Optional) |
|
'detection_keypoint_scores': [batch_size, max_detections, num_keypoints] |
|
float32 tensor containing keypoint scores. (Optional) |
|
'num_detections': [batch_size] int64 tensor containing number of valid |
|
detections. |
|
'groundtruth_boxes': [batch_size, num_boxes, 4] float32 tensor of boxes, in |
|
normalized or absolute coordinates, depending on the value of |
|
`scale_to_absolute`. (Optional) |
|
'groundtruth_classes': [batch_size, num_boxes] int64 tensor of 1-indexed |
|
classes. (Optional) |
|
'groundtruth_area': [batch_size, num_boxes] float32 tensor of bbox |
|
area. (Optional) |
|
'groundtruth_is_crowd': [batch_size, num_boxes] int64 tensor. (Optional) |
|
'groundtruth_difficult': [batch_size, num_boxes] int64 tensor. (Optional) |
|
'groundtruth_group_of': [batch_size, num_boxes] int64 tensor. (Optional) |
|
'groundtruth_instance_masks': 4D int64 tensor of instance masks |
|
(Optional). |
|
'groundtruth_keypoints': [batch_size, num_boxes, num_keypoints, 2] float32 |
|
tensor with keypoints (Optional). |
|
'groundtruth_keypoint_visibilities': [batch_size, num_boxes, num_keypoints] |
|
bool tensor with keypoint visibilities (Optional). |
|
'groundtruth_labeled_classes': [batch_size, num_classes] int64 tensor |
|
of 1-indexed classes. (Optional) |
|
'num_groundtruth_boxes': [batch_size] tensor containing the maximum number |
|
of groundtruth boxes per image. |
|
|
|
Raises: |
|
ValueError: if original_image_spatial_shape is not 2D int32 tensor of shape |
|
[2]. |
|
ValueError: if true_image_shapes is not 2D int32 tensor of shape |
|
[3]. |
|
""" |
|
label_id_offset = 1 |
|
|
|
input_data_fields = fields.InputDataFields |
|
if original_image_spatial_shapes is None: |
|
original_image_spatial_shapes = tf.tile( |
|
tf.expand_dims(tf.shape(images)[1:3], axis=0), |
|
multiples=[tf.shape(images)[0], 1]) |
|
else: |
|
if (len(original_image_spatial_shapes.shape) != 2 and |
|
original_image_spatial_shapes.shape[1] != 2): |
|
raise ValueError( |
|
'`original_image_spatial_shape` should be a 2D tensor of shape ' |
|
'[batch_size, 2].') |
|
|
|
if true_image_shapes is None: |
|
true_image_shapes = tf.tile( |
|
tf.expand_dims(tf.shape(images)[1:4], axis=0), |
|
multiples=[tf.shape(images)[0], 1]) |
|
else: |
|
if (len(true_image_shapes.shape) != 2 |
|
and true_image_shapes.shape[1] != 3): |
|
raise ValueError('`true_image_shapes` should be a 2D tensor of ' |
|
'shape [batch_size, 3].') |
|
|
|
output_dict = { |
|
input_data_fields.original_image: |
|
images, |
|
input_data_fields.key: |
|
keys, |
|
input_data_fields.original_image_spatial_shape: ( |
|
original_image_spatial_shapes), |
|
input_data_fields.true_image_shape: |
|
true_image_shapes |
|
} |
|
|
|
detection_fields = fields.DetectionResultFields |
|
detection_boxes = detections[detection_fields.detection_boxes] |
|
detection_scores = detections[detection_fields.detection_scores] |
|
num_detections = tf.cast(detections[detection_fields.num_detections], |
|
dtype=tf.int32) |
|
|
|
if class_agnostic: |
|
detection_classes = tf.ones_like(detection_scores, dtype=tf.int64) |
|
else: |
|
detection_classes = ( |
|
tf.to_int64(detections[detection_fields.detection_classes]) + |
|
label_id_offset) |
|
|
|
if scale_to_absolute: |
|
output_dict[detection_fields.detection_boxes] = ( |
|
shape_utils.static_or_dynamic_map_fn( |
|
_scale_box_to_absolute, |
|
elems=[detection_boxes, original_image_spatial_shapes], |
|
dtype=tf.float32)) |
|
else: |
|
output_dict[detection_fields.detection_boxes] = detection_boxes |
|
output_dict[detection_fields.detection_classes] = detection_classes |
|
output_dict[detection_fields.detection_scores] = detection_scores |
|
output_dict[detection_fields.num_detections] = num_detections |
|
|
|
if detection_fields.detection_masks in detections: |
|
detection_masks = detections[detection_fields.detection_masks] |
|
|
|
|
|
output_dict[detection_fields.detection_masks] = ( |
|
shape_utils.static_or_dynamic_map_fn( |
|
_resize_detection_masks, |
|
elems=[detection_boxes, detection_masks, |
|
original_image_spatial_shapes], |
|
dtype=tf.uint8)) |
|
|
|
if detection_fields.detection_keypoints in detections: |
|
detection_keypoints = detections[detection_fields.detection_keypoints] |
|
output_dict[detection_fields.detection_keypoints] = detection_keypoints |
|
if scale_to_absolute: |
|
output_dict[detection_fields.detection_keypoints] = ( |
|
shape_utils.static_or_dynamic_map_fn( |
|
_scale_keypoint_to_absolute, |
|
elems=[detection_keypoints, original_image_spatial_shapes], |
|
dtype=tf.float32)) |
|
if detection_fields.detection_keypoint_scores in detections: |
|
output_dict[detection_fields.detection_keypoint_scores] = detections[ |
|
detection_fields.detection_keypoint_scores] |
|
else: |
|
output_dict[detection_fields.detection_keypoint_scores] = tf.ones_like( |
|
detections[detection_fields.detection_keypoints][:, :, :, 0]) |
|
|
|
if groundtruth: |
|
if max_gt_boxes is None: |
|
if input_data_fields.num_groundtruth_boxes in groundtruth: |
|
max_gt_boxes = groundtruth[input_data_fields.num_groundtruth_boxes] |
|
else: |
|
raise ValueError( |
|
'max_gt_boxes must be provided when processing batched examples.') |
|
|
|
if input_data_fields.groundtruth_instance_masks in groundtruth: |
|
masks = groundtruth[input_data_fields.groundtruth_instance_masks] |
|
groundtruth[input_data_fields.groundtruth_instance_masks] = ( |
|
shape_utils.static_or_dynamic_map_fn( |
|
_resize_groundtruth_masks, |
|
elems=[masks, true_image_shapes, original_image_spatial_shapes], |
|
dtype=tf.uint8)) |
|
|
|
output_dict.update(groundtruth) |
|
|
|
image_shape = tf.cast(tf.shape(images), tf.float32) |
|
image_height, image_width = image_shape[1], image_shape[2] |
|
|
|
def _scale_box_to_normalized_true_image(args): |
|
"""Scale the box coordinates to be relative to the true image shape.""" |
|
boxes, true_image_shape = args |
|
true_image_shape = tf.cast(true_image_shape, tf.float32) |
|
true_height, true_width = true_image_shape[0], true_image_shape[1] |
|
normalized_window = tf.stack([0.0, 0.0, true_height / image_height, |
|
true_width / image_width]) |
|
return box_list_ops.change_coordinate_frame( |
|
box_list.BoxList(boxes), normalized_window).get() |
|
|
|
groundtruth_boxes = groundtruth[input_data_fields.groundtruth_boxes] |
|
groundtruth_boxes = shape_utils.static_or_dynamic_map_fn( |
|
_scale_box_to_normalized_true_image, |
|
elems=[groundtruth_boxes, true_image_shapes], dtype=tf.float32) |
|
output_dict[input_data_fields.groundtruth_boxes] = groundtruth_boxes |
|
|
|
if input_data_fields.groundtruth_keypoints in groundtruth: |
|
|
|
|
|
def _scale_keypoints_to_normalized_true_image(args): |
|
"""Scale the box coordinates to be relative to the true image shape.""" |
|
keypoints, true_image_shape = args |
|
true_image_shape = tf.cast(true_image_shape, tf.float32) |
|
true_height, true_width = true_image_shape[0], true_image_shape[1] |
|
normalized_window = tf.stack( |
|
[0.0, 0.0, true_height / image_height, true_width / image_width]) |
|
return keypoint_ops.change_coordinate_frame(keypoints, |
|
normalized_window) |
|
|
|
groundtruth_keypoints = groundtruth[ |
|
input_data_fields.groundtruth_keypoints] |
|
groundtruth_keypoints = shape_utils.static_or_dynamic_map_fn( |
|
_scale_keypoints_to_normalized_true_image, |
|
elems=[groundtruth_keypoints, true_image_shapes], |
|
dtype=tf.float32) |
|
output_dict[ |
|
input_data_fields.groundtruth_keypoints] = groundtruth_keypoints |
|
|
|
if scale_to_absolute: |
|
groundtruth_boxes = output_dict[input_data_fields.groundtruth_boxes] |
|
output_dict[input_data_fields.groundtruth_boxes] = ( |
|
shape_utils.static_or_dynamic_map_fn( |
|
_scale_box_to_absolute, |
|
elems=[groundtruth_boxes, original_image_spatial_shapes], |
|
dtype=tf.float32)) |
|
if input_data_fields.groundtruth_keypoints in groundtruth: |
|
groundtruth_keypoints = output_dict[ |
|
input_data_fields.groundtruth_keypoints] |
|
output_dict[input_data_fields.groundtruth_keypoints] = ( |
|
shape_utils.static_or_dynamic_map_fn( |
|
_scale_keypoint_to_absolute, |
|
elems=[groundtruth_keypoints, original_image_spatial_shapes], |
|
dtype=tf.float32)) |
|
|
|
|
|
if class_agnostic: |
|
groundtruth_classes = groundtruth[input_data_fields.groundtruth_classes] |
|
groundtruth_classes = tf.ones_like(groundtruth_classes, dtype=tf.int64) |
|
output_dict[input_data_fields.groundtruth_classes] = groundtruth_classes |
|
|
|
output_dict[input_data_fields.num_groundtruth_boxes] = max_gt_boxes |
|
|
|
return output_dict |
|
|
|
|
|
def get_evaluators(eval_config, categories, evaluator_options=None): |
|
"""Returns the evaluator class according to eval_config, valid for categories. |
|
|
|
Args: |
|
eval_config: An `eval_pb2.EvalConfig`. |
|
categories: A list of dicts, each of which has the following keys - |
|
'id': (required) an integer id uniquely identifying this category. |
|
'name': (required) string representing category name e.g., 'cat', 'dog'. |
|
'keypoints': (optional) dict mapping this category's keypoints to unique |
|
ids. |
|
evaluator_options: A dictionary of metric names (see |
|
EVAL_METRICS_CLASS_DICT) to `DetectionEvaluator` initialization |
|
keyword arguments. For example: |
|
evalator_options = { |
|
'coco_detection_metrics': {'include_metrics_per_category': True} |
|
} |
|
|
|
Returns: |
|
An list of instances of DetectionEvaluator. |
|
|
|
Raises: |
|
ValueError: if metric is not in the metric class dictionary. |
|
""" |
|
evaluator_options = evaluator_options or {} |
|
eval_metric_fn_keys = eval_config.metrics_set |
|
if not eval_metric_fn_keys: |
|
eval_metric_fn_keys = [EVAL_DEFAULT_METRIC] |
|
evaluators_list = [] |
|
for eval_metric_fn_key in eval_metric_fn_keys: |
|
if eval_metric_fn_key not in EVAL_METRICS_CLASS_DICT: |
|
raise ValueError('Metric not found: {}'.format(eval_metric_fn_key)) |
|
kwargs_dict = (evaluator_options[eval_metric_fn_key] if eval_metric_fn_key |
|
in evaluator_options else {}) |
|
evaluators_list.append(EVAL_METRICS_CLASS_DICT[eval_metric_fn_key]( |
|
categories, |
|
**kwargs_dict)) |
|
|
|
if isinstance(eval_config, eval_pb2.EvalConfig): |
|
parameterized_metrics = eval_config.parameterized_metric |
|
for parameterized_metric in parameterized_metrics: |
|
assert parameterized_metric.HasField('parameterized_metric') |
|
if parameterized_metric.WhichOneof( |
|
'parameterized_metric') == EVAL_KEYPOINT_METRIC: |
|
keypoint_metrics = parameterized_metric.coco_keypoint_metrics |
|
|
|
category_keypoints = {} |
|
class_label = keypoint_metrics.class_label |
|
category = None |
|
for cat in categories: |
|
if cat['name'] == class_label: |
|
category = cat |
|
break |
|
if not category: |
|
continue |
|
keypoints_for_this_class = category['keypoints'] |
|
category_keypoints = [{ |
|
'id': keypoints_for_this_class[kp_name], 'name': kp_name |
|
} for kp_name in keypoints_for_this_class] |
|
|
|
evaluators_list.append(EVAL_METRICS_CLASS_DICT[EVAL_KEYPOINT_METRIC]( |
|
category['id'], category_keypoints, class_label, |
|
keypoint_metrics.keypoint_label_to_sigmas)) |
|
return evaluators_list |
|
|
|
|
|
def get_eval_metric_ops_for_evaluators(eval_config, |
|
categories, |
|
eval_dict): |
|
"""Returns eval metrics ops to use with `tf.estimator.EstimatorSpec`. |
|
|
|
Args: |
|
eval_config: An `eval_pb2.EvalConfig`. |
|
categories: A list of dicts, each of which has the following keys - |
|
'id': (required) an integer id uniquely identifying this category. |
|
'name': (required) string representing category name e.g., 'cat', 'dog'. |
|
eval_dict: An evaluation dictionary, returned from |
|
result_dict_for_single_example(). |
|
|
|
Returns: |
|
A dictionary of metric names to tuple of value_op and update_op that can be |
|
used as eval metric ops in tf.EstimatorSpec. |
|
""" |
|
eval_metric_ops = {} |
|
evaluator_options = evaluator_options_from_eval_config(eval_config) |
|
evaluators_list = get_evaluators(eval_config, categories, evaluator_options) |
|
for evaluator in evaluators_list: |
|
eval_metric_ops.update(evaluator.get_estimator_eval_metric_ops( |
|
eval_dict)) |
|
return eval_metric_ops |
|
|
|
|
|
def evaluator_options_from_eval_config(eval_config): |
|
"""Produces a dictionary of evaluation options for each eval metric. |
|
|
|
Args: |
|
eval_config: An `eval_pb2.EvalConfig`. |
|
|
|
Returns: |
|
evaluator_options: A dictionary of metric names (see |
|
EVAL_METRICS_CLASS_DICT) to `DetectionEvaluator` initialization |
|
keyword arguments. For example: |
|
evalator_options = { |
|
'coco_detection_metrics': {'include_metrics_per_category': True} |
|
} |
|
""" |
|
eval_metric_fn_keys = eval_config.metrics_set |
|
evaluator_options = {} |
|
for eval_metric_fn_key in eval_metric_fn_keys: |
|
if eval_metric_fn_key in ('coco_detection_metrics', 'coco_mask_metrics'): |
|
evaluator_options[eval_metric_fn_key] = { |
|
'include_metrics_per_category': ( |
|
eval_config.include_metrics_per_category) |
|
} |
|
elif eval_metric_fn_key == 'precision_at_recall_detection_metrics': |
|
evaluator_options[eval_metric_fn_key] = { |
|
'recall_lower_bound': (eval_config.recall_lower_bound), |
|
'recall_upper_bound': (eval_config.recall_upper_bound) |
|
} |
|
return evaluator_options |
|
|