File size: 6,719 Bytes
c9019cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
"""Some special pupropse layers for SSD."""
import keras.backend as K
from keras.engine.topology import InputSpec
from keras.engine.topology import Layer
import numpy as np
import tensorflow as tf
class Normalize(Layer):
"""Normalization layer as described in ParseNet paper.
# Arguments
scale: Default feature scale.
# Input shape
4D tensor with shape:
`(samples, channels, rows, cols)` if dim_ordering='th'
or 4D tensor with shape:
`(samples, rows, cols, channels)` if dim_ordering='tf'.
# Output shape
Same as input
# References
http://cs.unc.edu/~wliu/papers/parsenet.pdf
#TODO
Add possibility to have one scale for all features.
"""
def __init__(self, scale, **kwargs):
if K.image_dim_ordering() == 'tf':
self.axis = 3
else:
self.axis = 1
self.scale = scale
super(Normalize, self).__init__(**kwargs)
def build(self, input_shape):
self.input_spec = [InputSpec(shape=input_shape)]
shape = (input_shape[self.axis],)
init_gamma = self.scale * np.ones(shape)
self.gamma = K.variable(init_gamma, name='{}_gamma'.format(self.name))
self.trainable_weights = [self.gamma]
def call(self, x, mask=None):
output = K.l2_normalize(x, self.axis)
output *= self.gamma
return output
class PriorBox(Layer):
"""Generate the prior boxes of designated sizes and aspect ratios.
# Arguments
img_size: Size of the input image as tuple (w, h).
min_size: Minimum box size in pixels.
max_size: Maximum box size in pixels.
aspect_ratios: List of aspect ratios of boxes.
flip: Whether to consider reverse aspect ratios.
variances: List of variances for x, y, w, h.
clip: Whether to clip the prior's coordinates
such that they are within [0, 1].
# Input shape
4D tensor with shape:
`(samples, channels, rows, cols)` if dim_ordering='th'
or 4D tensor with shape:
`(samples, rows, cols, channels)` if dim_ordering='tf'.
# Output shape
3D tensor with shape:
(samples, num_boxes, 8)
# References
https://arxiv.org/abs/1512.02325
#TODO
Add possibility not to have variances.
Add Theano support
"""
def __init__(self, img_size, min_size, max_size=None, aspect_ratios=None,
flip=True, variances=[0.1], clip=True, **kwargs):
if K.image_dim_ordering() == 'tf':
self.waxis = 2
self.haxis = 1
else:
self.waxis = 3
self.haxis = 2
self.img_size = img_size
if min_size <= 0:
raise Exception('min_size must be positive.')
self.min_size = min_size
self.max_size = max_size
self.aspect_ratios = [1.0]
if max_size:
if max_size < min_size:
raise Exception('max_size must be greater than min_size.')
self.aspect_ratios.append(1.0)
if aspect_ratios:
for ar in aspect_ratios:
if ar in self.aspect_ratios:
continue
self.aspect_ratios.append(ar)
if flip:
self.aspect_ratios.append(1.0 / ar)
self.variances = np.array(variances)
self.clip = True
super(PriorBox, self).__init__(**kwargs)
def compute_output_shape(self, input_shape):
num_priors_ = len(self.aspect_ratios)
layer_width = input_shape[self.waxis]
layer_height = input_shape[self.haxis]
num_boxes = num_priors_ * layer_width * layer_height
return (input_shape[0], num_boxes, 8)
def call(self, x, mask=None):
if hasattr(x, '_keras_shape'):
input_shape = x._keras_shape
elif hasattr(K, 'int_shape'):
input_shape = K.int_shape(x)
layer_width = input_shape[self.waxis]
layer_height = input_shape[self.haxis]
img_width = self.img_size[0]
img_height = self.img_size[1]
# define prior boxes shapes
box_widths = []
box_heights = []
for ar in self.aspect_ratios:
if ar == 1 and len(box_widths) == 0:
box_widths.append(self.min_size)
box_heights.append(self.min_size)
elif ar == 1 and len(box_widths) > 0:
box_widths.append(np.sqrt(self.min_size * self.max_size))
box_heights.append(np.sqrt(self.min_size * self.max_size))
elif ar != 1:
box_widths.append(self.min_size * np.sqrt(ar))
box_heights.append(self.min_size / np.sqrt(ar))
box_widths = 0.5 * np.array(box_widths)
box_heights = 0.5 * np.array(box_heights)
# define centers of prior boxes
step_x = img_width / layer_width
step_y = img_height / layer_height
linx = np.linspace(0.5 * step_x, img_width - 0.5 * step_x,
layer_width)
liny = np.linspace(0.5 * step_y, img_height - 0.5 * step_y,
layer_height)
centers_x, centers_y = np.meshgrid(linx, liny)
centers_x = centers_x.reshape(-1, 1)
centers_y = centers_y.reshape(-1, 1)
# define xmin, ymin, xmax, ymax of prior boxes
num_priors_ = len(self.aspect_ratios)
prior_boxes = np.concatenate((centers_x, centers_y), axis=1)
prior_boxes = np.tile(prior_boxes, (1, 2 * num_priors_))
prior_boxes[:, ::4] -= box_widths
prior_boxes[:, 1::4] -= box_heights
prior_boxes[:, 2::4] += box_widths
prior_boxes[:, 3::4] += box_heights
prior_boxes[:, ::2] /= img_width
prior_boxes[:, 1::2] /= img_height
prior_boxes = prior_boxes.reshape(-1, 4)
if self.clip:
prior_boxes = np.minimum(np.maximum(prior_boxes, 0.0), 1.0)
# define variances
num_boxes = len(prior_boxes)
if len(self.variances) == 1:
variances = np.ones((num_boxes, 4)) * self.variances[0]
elif len(self.variances) == 4:
variances = np.tile(self.variances, (num_boxes, 1))
else:
raise Exception('Must provide one or four variances.')
prior_boxes = np.concatenate((prior_boxes, variances), axis=1)
prior_boxes_tensor = K.expand_dims(K.variable(prior_boxes), 0)
if K.backend() == 'tensorflow':
pattern = [tf.shape(x)[0], 1, 1]
prior_boxes_tensor = tf.tile(prior_boxes_tensor, pattern)
elif K.backend() == 'theano':
#TODO
pass
return prior_boxes_tensor
|