Spaces:
Sleeping
Sleeping
File size: 6,394 Bytes
1f85df6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
import numpy as np
import tensorflow as tf
def detection(
prediction,
anchor_boxes,
num_classes,
image_shape,
input_shape,
max_boxes = 20,
score_threshold=0.3,
iou_threshold=0.45,
classes_can_overlap=True,
):
'''
INPUT:
OUTPUT:
'''
all_boxes = []
'''@ Each output layer'''
for output, anchors in zip( prediction, anchor_boxes ):
'''Preprocessing'''
'''-------------'''
# shapes
batch_size = output.shape[0]
grid_h, grid_w = output.shape[1:3]
# reshape to [batch_size, grid_height, grid_width, num_anchors, box_params]
output = tf.reshape( output, [ -1, grid_h, grid_w, len(anchors), num_classes+5 ] )
# create a tensor for the anchor boxes
anchors_tensor = tf.constant(anchors, dtype=output.dtype)
'''Scaling factors'''
'''---------------'''
image_shape_tensor = tf.cast( image_shape, output.dtype ) # actual image's shape
grids_shape_tensor = tf.cast( output.shape[1:3], output.dtype ) # grid_height, grid_width @ output layer
input_shape_tensor = tf.cast( input_shape, output.dtype ) # yolo input image's shape
# reshape
image_shape_tensor = tf.reshape( image_shape_tensor, [-1, 1, 1, 1, 2] )
grids_shape_tensor = tf.reshape( grids_shape_tensor, [-1, 1, 1, 1, 2] )
input_shape_tensor = tf.reshape( input_shape_tensor, [-1, 1, 1, 1, 2] )
### Scaling factors
sized_shape_tensor = tf.round( image_shape_tensor * tf.reshape( tf.reduce_min( input_shape_tensor / image_shape_tensor, axis=-1 ), [-1,1,1,1,1] ) )
# to scale the boxes from grid's unit to actual image's pixel unit
box_scaling = input_shape_tensor * image_shape_tensor / sized_shape_tensor / grids_shape_tensor
# to offset the boxes
box_offsets = (tf.expand_dims(tf.reduce_max(image_shape_tensor, axis=-1), axis=-1) - image_shape_tensor) / 2.
'''Box geometric properties'''
'''------------------------'''
grid_h, grid_w = output.shape[1:3] # grid_height, grid_width @ output layer
grid_i = tf.reshape( np.arange(grid_h), [-1, 1, 1, 1] )
grid_i = tf.tile( grid_i, [1, grid_w, 1, 1] )
grid_j = tf.reshape( np.arange(grid_w), [1, -1, 1, 1] )
grid_j = tf.tile( grid_j, [grid_h, 1, 1, 1] )
grid_ji = tf.concat( [grid_j, grid_i], axis=-1 )
grid_ji = tf.cast( grid_ji, output.dtype )
# Box centers
box_xy = output[..., 0:2]
box_xy = tf.sigmoid( box_xy ) + grid_ji
# Box sizes
box_wh = output[..., 2:4]
box_wh = tf.exp( box_wh ) * anchors_tensor
# scale to actual pixel unit
box_xy = box_xy * box_scaling - box_offsets[...,::-1]
box_wh = box_wh * box_scaling
# calculate top-left corner (x1, y1) and bottom-right corner (x2, y2) of the boxex
box_x1_y1 = box_xy - box_wh / 2
box_x2_y2 = box_xy + box_wh / 2
# top-left corner cannot be negative
box_x1_y1 = tf.maximum(0, box_x1_y1)
# bottom-right corner cannot be more than actual image size
box_x2_y2 = tf.minimum(box_x2_y2, image_shape_tensor[..., ::-1])
'''Box labels and confidences'''
'''--------------------------'''
# class probabilities = objectness score * conditional class probabilities
if classes_can_overlap:
# use sigmoid for the conditional class probabilities
classs_probs = tf.sigmoid( output[..., 4:5] ) * tf.sigmoid( output[..., 5:] )
else:
# use softmax for the conditional class probabilities
classs_probs = tf.sigmoid( output[..., 4:5] ) * tf.nn.softmax( output[..., 5:] )
box_cl = tf.argmax( classs_probs, axis=-1 ) # final classes
box_sc = tf.reduce_max( classs_probs, axis=-1 ) # confidence scores
'''Organize'''
'''--------'''
# take care of dtype and dimensions
box_cl = tf.cast( box_cl, output.dtype )
box_cl = tf.expand_dims(box_cl, axis=-1)
box_sc = tf.expand_dims(box_sc, axis=-1)
# store all information as: [ left(x1), top(y1), right(x2), bottom(y2), confidence, label ]
boxes = tf.reshape( tf.concat( [ box_x1_y1, box_x2_y2, box_sc, box_cl ], axis=-1 ),
[batch_size, -1, 6] )
all_boxes. append( boxes )
# Merge across all output layers
all_boxes = tf.concat( all_boxes, axis=1 )
# To store all the final results of all images in the batch
all_final_boxes = []
'''For each image in the batch'''
for _boxes_ in all_boxes:
if classes_can_overlap:
'''Perform NMS for each class individually'''
# to stote the final results of this image
final_boxes = []
for class_id in range(num_classes):
# Get the boxes and scores for this class
class_boxes = _boxes_[ _boxes_[...,-1] == class_id ]
'''Non-max-suppression'''
selected_idc = tf.image.non_max_suppression(
class_boxes[...,:4], # boxes' (y1,x1,y2,x2)
class_boxes[...,-2], # boxes' scores
max_output_size = max_boxes,
iou_threshold = iou_threshold,
score_threshold = score_threshold
)
# boxes selected by nms
class_boxes = tf.gather( class_boxes, selected_idc )
final_boxes.append( class_boxes )
# concatenate boxes for each class in the image
final_boxes = tf.concat( final_boxes, axis=0 )
else:
'''Perform NMS for all classes'''
# nms indices
selected_idc = tf.image.non_max_suppression(
_boxes_[...,:4], # boxes' (y1,x1,y2,x2)
_boxes_[...,-2], # boxes' scores
max_output_size = max_boxes,
iou_threshold = iou_threshold,
score_threshold = score_threshold
)
# boxes selected by nms
final_boxes = tf.gather( _boxes_, selected_idc )
# append final boxes for each image in the batch
all_final_boxes.append( final_boxes )
return all_final_boxes |