File size: 7,366 Bytes
18ddfe2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/hyperparams.proto";
// Configuration proto for box predictor. See core/box_predictor.py for details.
message BoxPredictor {
oneof box_predictor_oneof {
ConvolutionalBoxPredictor convolutional_box_predictor = 1;
MaskRCNNBoxPredictor mask_rcnn_box_predictor = 2;
RfcnBoxPredictor rfcn_box_predictor = 3;
WeightSharedConvolutionalBoxPredictor
weight_shared_convolutional_box_predictor = 4;
}
}
// Configuration proto for Convolutional box predictor.
// Next id: 13
message ConvolutionalBoxPredictor {
// Hyperparameters for convolution ops used in the box predictor.
optional Hyperparams conv_hyperparams = 1;
// Minimum feature depth prior to predicting box encodings and class
// predictions.
optional int32 min_depth = 2 [default = 0];
// Maximum feature depth prior to predicting box encodings and class
// predictions. If max_depth is set to 0, no additional feature map will be
// inserted before location and class predictions.
optional int32 max_depth = 3 [default = 0];
// Number of the additional conv layers before the predictor.
optional int32 num_layers_before_predictor = 4 [default = 0];
// Whether to use dropout for class prediction.
optional bool use_dropout = 5 [default = true];
// Keep probability for dropout
optional float dropout_keep_probability = 6 [default = 0.8];
// Size of final convolution kernel. If the spatial resolution of the feature
// map is smaller than the kernel size, then the kernel size is set to
// min(feature_width, feature_height).
optional int32 kernel_size = 7 [default = 1];
// Size of the encoding for boxes.
optional int32 box_code_size = 8 [default = 4];
// Whether to apply sigmoid to the output of class predictions.
// TODO(jonathanhuang): Do we need this since we have a post processing
// module.?
optional bool apply_sigmoid_to_scores = 9 [default = false];
optional float class_prediction_bias_init = 10 [default = 0.0];
// Whether to use depthwise separable convolution for box predictor layers.
optional bool use_depthwise = 11 [default = false];
// If specified, apply clipping to box encodings.
message BoxEncodingsClipRange {
optional float min = 1;
optional float max = 2;
}
optional BoxEncodingsClipRange box_encodings_clip_range = 12;
}
// Configuration proto for weight shared convolutional box predictor.
// Next id: 19
message WeightSharedConvolutionalBoxPredictor {
// Hyperparameters for convolution ops used in the box predictor.
optional Hyperparams conv_hyperparams = 1;
// Number of the additional conv layers before the predictor.
optional int32 num_layers_before_predictor = 4 [default = 0];
// Output depth for the convolution ops prior to predicting box encodings
// and class predictions.
optional int32 depth = 2 [default = 0];
// Size of final convolution kernel. If the spatial resolution of the feature
// map is smaller than the kernel size, then the kernel size is set to
// min(feature_width, feature_height).
optional int32 kernel_size = 7 [default = 3];
// Size of the encoding for boxes.
optional int32 box_code_size = 8 [default = 4];
// Bias initialization for class prediction. It has been show to stabilize
// training where there are large number of negative boxes. See
// https://arxiv.org/abs/1708.02002 for details.
optional float class_prediction_bias_init = 10 [default = 0.0];
// Whether to use dropout for class prediction.
optional bool use_dropout = 11 [default = false];
// Keep probability for dropout.
optional float dropout_keep_probability = 12 [default = 0.8];
// Whether to share the multi-layer tower between box prediction and class
// prediction heads.
optional bool share_prediction_tower = 13 [default = false];
// Whether to use depthwise separable convolution for box predictor layers.
optional bool use_depthwise = 14 [default = false];
// Enum to specify how to convert the detection scores at inference time.
enum ScoreConverter {
// Input scores equals output scores.
IDENTITY = 0;
// Applies a sigmoid on input scores.
SIGMOID = 1;
}
// Callable elementwise score converter at inference time.
optional ScoreConverter score_converter = 16 [default = IDENTITY];
// If specified, apply clipping to box encodings.
message BoxEncodingsClipRange {
optional float min = 1;
optional float max = 2;
}
optional BoxEncodingsClipRange box_encodings_clip_range = 17;
}
// TODO(alirezafathi): Refactor the proto file to be able to configure mask rcnn
// head easily.
// Next id: 15
message MaskRCNNBoxPredictor {
// Hyperparameters for fully connected ops used in the box predictor.
optional Hyperparams fc_hyperparams = 1;
// Whether to use dropout op prior to the both box and class predictions.
optional bool use_dropout = 2 [default = false];
// Keep probability for dropout. This is only used if use_dropout is true.
optional float dropout_keep_probability = 3 [default = 0.5];
// Size of the encoding for the boxes.
optional int32 box_code_size = 4 [default = 4];
// Hyperparameters for convolution ops used in the box predictor.
optional Hyperparams conv_hyperparams = 5;
// Whether to predict instance masks inside detection boxes.
optional bool predict_instance_masks = 6 [default = false];
// The depth for the first conv2d_transpose op applied to the
// image_features in the mask prediction branch. If set to 0, the value
// will be set automatically based on the number of channels in the image
// features and the number of classes.
optional int32 mask_prediction_conv_depth = 7 [default = 256];
// Whether to predict keypoints inside detection boxes.
optional bool predict_keypoints = 8 [default = false];
// The height and the width of the predicted mask.
optional int32 mask_height = 9 [default = 15];
optional int32 mask_width = 10 [default = 15];
// The number of convolutions applied to image_features in the mask prediction
// branch.
optional int32 mask_prediction_num_conv_layers = 11 [default = 2];
optional bool masks_are_class_agnostic = 12 [default = false];
// Whether to use one box for all classes rather than a different box for each
// class.
optional bool share_box_across_classes = 13 [default = false];
// Whether to apply convolutions on mask features before upsampling using
// nearest neighbor resizing.
// By default, mask features are resized to [`mask_height`, `mask_width`]
// before applying convolutions and predicting masks.
optional bool convolve_then_upsample_masks = 14 [default = false];
}
message RfcnBoxPredictor {
// Hyperparameters for convolution ops used in the box predictor.
optional Hyperparams conv_hyperparams = 1;
// Bin sizes for RFCN crops.
optional int32 num_spatial_bins_height = 2 [default = 3];
optional int32 num_spatial_bins_width = 3 [default = 3];
// Target depth to reduce the input image features to.
optional int32 depth = 4 [default = 1024];
// Size of the encoding for the boxes.
optional int32 box_code_size = 5 [default = 4];
// Size to resize the rfcn crops to.
optional int32 crop_height = 6 [default = 12];
optional int32 crop_width = 7 [default = 12];
}
|