nooneshouldtouch's picture
ok
1f85df6
import numpy as np
import tensorflow as tf
def detection(
prediction,
anchor_boxes,
num_classes,
image_shape,
input_shape,
max_boxes = 20,
score_threshold=0.3,
iou_threshold=0.45,
classes_can_overlap=True,
):
'''
INPUT:
OUTPUT:
'''
all_boxes = []
'''@ Each output layer'''
for output, anchors in zip( prediction, anchor_boxes ):
'''Preprocessing'''
'''-------------'''
# shapes
batch_size = output.shape[0]
grid_h, grid_w = output.shape[1:3]
# reshape to [batch_size, grid_height, grid_width, num_anchors, box_params]
output = tf.reshape( output, [ -1, grid_h, grid_w, len(anchors), num_classes+5 ] )
# create a tensor for the anchor boxes
anchors_tensor = tf.constant(anchors, dtype=output.dtype)
'''Scaling factors'''
'''---------------'''
image_shape_tensor = tf.cast( image_shape, output.dtype ) # actual image's shape
grids_shape_tensor = tf.cast( output.shape[1:3], output.dtype ) # grid_height, grid_width @ output layer
input_shape_tensor = tf.cast( input_shape, output.dtype ) # yolo input image's shape
# reshape
image_shape_tensor = tf.reshape( image_shape_tensor, [-1, 1, 1, 1, 2] )
grids_shape_tensor = tf.reshape( grids_shape_tensor, [-1, 1, 1, 1, 2] )
input_shape_tensor = tf.reshape( input_shape_tensor, [-1, 1, 1, 1, 2] )
### Scaling factors
sized_shape_tensor = tf.round( image_shape_tensor * tf.reshape( tf.reduce_min( input_shape_tensor / image_shape_tensor, axis=-1 ), [-1,1,1,1,1] ) )
# to scale the boxes from grid's unit to actual image's pixel unit
box_scaling = input_shape_tensor * image_shape_tensor / sized_shape_tensor / grids_shape_tensor
# to offset the boxes
box_offsets = (tf.expand_dims(tf.reduce_max(image_shape_tensor, axis=-1), axis=-1) - image_shape_tensor) / 2.
'''Box geometric properties'''
'''------------------------'''
grid_h, grid_w = output.shape[1:3] # grid_height, grid_width @ output layer
grid_i = tf.reshape( np.arange(grid_h), [-1, 1, 1, 1] )
grid_i = tf.tile( grid_i, [1, grid_w, 1, 1] )
grid_j = tf.reshape( np.arange(grid_w), [1, -1, 1, 1] )
grid_j = tf.tile( grid_j, [grid_h, 1, 1, 1] )
grid_ji = tf.concat( [grid_j, grid_i], axis=-1 )
grid_ji = tf.cast( grid_ji, output.dtype )
# Box centers
box_xy = output[..., 0:2]
box_xy = tf.sigmoid( box_xy ) + grid_ji
# Box sizes
box_wh = output[..., 2:4]
box_wh = tf.exp( box_wh ) * anchors_tensor
# scale to actual pixel unit
box_xy = box_xy * box_scaling - box_offsets[...,::-1]
box_wh = box_wh * box_scaling
# calculate top-left corner (x1, y1) and bottom-right corner (x2, y2) of the boxex
box_x1_y1 = box_xy - box_wh / 2
box_x2_y2 = box_xy + box_wh / 2
# top-left corner cannot be negative
box_x1_y1 = tf.maximum(0, box_x1_y1)
# bottom-right corner cannot be more than actual image size
box_x2_y2 = tf.minimum(box_x2_y2, image_shape_tensor[..., ::-1])
'''Box labels and confidences'''
'''--------------------------'''
# class probabilities = objectness score * conditional class probabilities
if classes_can_overlap:
# use sigmoid for the conditional class probabilities
classs_probs = tf.sigmoid( output[..., 4:5] ) * tf.sigmoid( output[..., 5:] )
else:
# use softmax for the conditional class probabilities
classs_probs = tf.sigmoid( output[..., 4:5] ) * tf.nn.softmax( output[..., 5:] )
box_cl = tf.argmax( classs_probs, axis=-1 ) # final classes
box_sc = tf.reduce_max( classs_probs, axis=-1 ) # confidence scores
'''Organize'''
'''--------'''
# take care of dtype and dimensions
box_cl = tf.cast( box_cl, output.dtype )
box_cl = tf.expand_dims(box_cl, axis=-1)
box_sc = tf.expand_dims(box_sc, axis=-1)
# store all information as: [ left(x1), top(y1), right(x2), bottom(y2), confidence, label ]
boxes = tf.reshape( tf.concat( [ box_x1_y1, box_x2_y2, box_sc, box_cl ], axis=-1 ),
[batch_size, -1, 6] )
all_boxes. append( boxes )
# Merge across all output layers
all_boxes = tf.concat( all_boxes, axis=1 )
# To store all the final results of all images in the batch
all_final_boxes = []
'''For each image in the batch'''
for _boxes_ in all_boxes:
if classes_can_overlap:
'''Perform NMS for each class individually'''
# to stote the final results of this image
final_boxes = []
for class_id in range(num_classes):
# Get the boxes and scores for this class
class_boxes = _boxes_[ _boxes_[...,-1] == class_id ]
'''Non-max-suppression'''
selected_idc = tf.image.non_max_suppression(
class_boxes[...,:4], # boxes' (y1,x1,y2,x2)
class_boxes[...,-2], # boxes' scores
max_output_size = max_boxes,
iou_threshold = iou_threshold,
score_threshold = score_threshold
)
# boxes selected by nms
class_boxes = tf.gather( class_boxes, selected_idc )
final_boxes.append( class_boxes )
# concatenate boxes for each class in the image
final_boxes = tf.concat( final_boxes, axis=0 )
else:
'''Perform NMS for all classes'''
# nms indices
selected_idc = tf.image.non_max_suppression(
_boxes_[...,:4], # boxes' (y1,x1,y2,x2)
_boxes_[...,-2], # boxes' scores
max_output_size = max_boxes,
iou_threshold = iou_threshold,
score_threshold = score_threshold
)
# boxes selected by nms
final_boxes = tf.gather( _boxes_, selected_idc )
# append final boxes for each image in the batch
all_final_boxes.append( final_boxes )
return all_final_boxes