Spaces:
Sleeping
Sleeping
import numpy as np | |
import tensorflow as tf | |
def detection( | |
prediction, | |
anchor_boxes, | |
num_classes, | |
image_shape, | |
input_shape, | |
max_boxes = 20, | |
score_threshold=0.3, | |
iou_threshold=0.45, | |
classes_can_overlap=True, | |
): | |
''' | |
INPUT: | |
OUTPUT: | |
''' | |
all_boxes = [] | |
'''@ Each output layer''' | |
for output, anchors in zip( prediction, anchor_boxes ): | |
'''Preprocessing''' | |
'''-------------''' | |
# shapes | |
batch_size = output.shape[0] | |
grid_h, grid_w = output.shape[1:3] | |
# reshape to [batch_size, grid_height, grid_width, num_anchors, box_params] | |
output = tf.reshape( output, [ -1, grid_h, grid_w, len(anchors), num_classes+5 ] ) | |
# create a tensor for the anchor boxes | |
anchors_tensor = tf.constant(anchors, dtype=output.dtype) | |
'''Scaling factors''' | |
'''---------------''' | |
image_shape_tensor = tf.cast( image_shape, output.dtype ) # actual image's shape | |
grids_shape_tensor = tf.cast( output.shape[1:3], output.dtype ) # grid_height, grid_width @ output layer | |
input_shape_tensor = tf.cast( input_shape, output.dtype ) # yolo input image's shape | |
# reshape | |
image_shape_tensor = tf.reshape( image_shape_tensor, [-1, 1, 1, 1, 2] ) | |
grids_shape_tensor = tf.reshape( grids_shape_tensor, [-1, 1, 1, 1, 2] ) | |
input_shape_tensor = tf.reshape( input_shape_tensor, [-1, 1, 1, 1, 2] ) | |
### Scaling factors | |
sized_shape_tensor = tf.round( image_shape_tensor * tf.reshape( tf.reduce_min( input_shape_tensor / image_shape_tensor, axis=-1 ), [-1,1,1,1,1] ) ) | |
# to scale the boxes from grid's unit to actual image's pixel unit | |
box_scaling = input_shape_tensor * image_shape_tensor / sized_shape_tensor / grids_shape_tensor | |
# to offset the boxes | |
box_offsets = (tf.expand_dims(tf.reduce_max(image_shape_tensor, axis=-1), axis=-1) - image_shape_tensor) / 2. | |
'''Box geometric properties''' | |
'''------------------------''' | |
grid_h, grid_w = output.shape[1:3] # grid_height, grid_width @ output layer | |
grid_i = tf.reshape( np.arange(grid_h), [-1, 1, 1, 1] ) | |
grid_i = tf.tile( grid_i, [1, grid_w, 1, 1] ) | |
grid_j = tf.reshape( np.arange(grid_w), [1, -1, 1, 1] ) | |
grid_j = tf.tile( grid_j, [grid_h, 1, 1, 1] ) | |
grid_ji = tf.concat( [grid_j, grid_i], axis=-1 ) | |
grid_ji = tf.cast( grid_ji, output.dtype ) | |
# Box centers | |
box_xy = output[..., 0:2] | |
box_xy = tf.sigmoid( box_xy ) + grid_ji | |
# Box sizes | |
box_wh = output[..., 2:4] | |
box_wh = tf.exp( box_wh ) * anchors_tensor | |
# scale to actual pixel unit | |
box_xy = box_xy * box_scaling - box_offsets[...,::-1] | |
box_wh = box_wh * box_scaling | |
# calculate top-left corner (x1, y1) and bottom-right corner (x2, y2) of the boxex | |
box_x1_y1 = box_xy - box_wh / 2 | |
box_x2_y2 = box_xy + box_wh / 2 | |
# top-left corner cannot be negative | |
box_x1_y1 = tf.maximum(0, box_x1_y1) | |
# bottom-right corner cannot be more than actual image size | |
box_x2_y2 = tf.minimum(box_x2_y2, image_shape_tensor[..., ::-1]) | |
'''Box labels and confidences''' | |
'''--------------------------''' | |
# class probabilities = objectness score * conditional class probabilities | |
if classes_can_overlap: | |
# use sigmoid for the conditional class probabilities | |
classs_probs = tf.sigmoid( output[..., 4:5] ) * tf.sigmoid( output[..., 5:] ) | |
else: | |
# use softmax for the conditional class probabilities | |
classs_probs = tf.sigmoid( output[..., 4:5] ) * tf.nn.softmax( output[..., 5:] ) | |
box_cl = tf.argmax( classs_probs, axis=-1 ) # final classes | |
box_sc = tf.reduce_max( classs_probs, axis=-1 ) # confidence scores | |
'''Organize''' | |
'''--------''' | |
# take care of dtype and dimensions | |
box_cl = tf.cast( box_cl, output.dtype ) | |
box_cl = tf.expand_dims(box_cl, axis=-1) | |
box_sc = tf.expand_dims(box_sc, axis=-1) | |
# store all information as: [ left(x1), top(y1), right(x2), bottom(y2), confidence, label ] | |
boxes = tf.reshape( tf.concat( [ box_x1_y1, box_x2_y2, box_sc, box_cl ], axis=-1 ), | |
[batch_size, -1, 6] ) | |
all_boxes. append( boxes ) | |
# Merge across all output layers | |
all_boxes = tf.concat( all_boxes, axis=1 ) | |
# To store all the final results of all images in the batch | |
all_final_boxes = [] | |
'''For each image in the batch''' | |
for _boxes_ in all_boxes: | |
if classes_can_overlap: | |
'''Perform NMS for each class individually''' | |
# to stote the final results of this image | |
final_boxes = [] | |
for class_id in range(num_classes): | |
# Get the boxes and scores for this class | |
class_boxes = _boxes_[ _boxes_[...,-1] == class_id ] | |
'''Non-max-suppression''' | |
selected_idc = tf.image.non_max_suppression( | |
class_boxes[...,:4], # boxes' (y1,x1,y2,x2) | |
class_boxes[...,-2], # boxes' scores | |
max_output_size = max_boxes, | |
iou_threshold = iou_threshold, | |
score_threshold = score_threshold | |
) | |
# boxes selected by nms | |
class_boxes = tf.gather( class_boxes, selected_idc ) | |
final_boxes.append( class_boxes ) | |
# concatenate boxes for each class in the image | |
final_boxes = tf.concat( final_boxes, axis=0 ) | |
else: | |
'''Perform NMS for all classes''' | |
# nms indices | |
selected_idc = tf.image.non_max_suppression( | |
_boxes_[...,:4], # boxes' (y1,x1,y2,x2) | |
_boxes_[...,-2], # boxes' scores | |
max_output_size = max_boxes, | |
iou_threshold = iou_threshold, | |
score_threshold = score_threshold | |
) | |
# boxes selected by nms | |
final_boxes = tf.gather( _boxes_, selected_idc ) | |
# append final boxes for each image in the batch | |
all_final_boxes.append( final_boxes ) | |
return all_final_boxes |