File size: 9,339 Bytes
18ddfe2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
syntax = "proto2";

package object_detection.protos;

import "object_detection/protos/anchor_generator.proto";
import "object_detection/protos/box_predictor.proto";
import "object_detection/protos/hyperparams.proto";
import "object_detection/protos/image_resizer.proto";
import "object_detection/protos/losses.proto";
import "object_detection/protos/post_processing.proto";

// Configuration for Faster R-CNN models.
// See meta_architectures/faster_rcnn_meta_arch.py and models/model_builder.py
//
// Naming conventions:
// Faster R-CNN models have two stages: a first stage region proposal network
// (or RPN) and a second stage box classifier.  We thus use the prefixes
// `first_stage_` and `second_stage_` to indicate the stage to which each
// parameter pertains when relevant.
message FasterRcnn {
  // Whether to construct only the Region Proposal Network (RPN).
  optional int32 number_of_stages = 1 [default = 2];

  // Number of classes to predict.
  optional int32 num_classes = 3;

  // Image resizer for preprocessing the input image.
  optional ImageResizer image_resizer = 4;

  // Feature extractor config.
  optional FasterRcnnFeatureExtractor feature_extractor = 5;

  // (First stage) region proposal network (RPN) parameters.

  // Anchor generator to compute RPN anchors.
  optional AnchorGenerator first_stage_anchor_generator = 6;

  // Atrous rate for the convolution op applied to the
  // `first_stage_features_to_crop` tensor to obtain box predictions.
  optional int32 first_stage_atrous_rate = 7 [default = 1];

  // Hyperparameters for the convolutional RPN box predictor.
  optional Hyperparams first_stage_box_predictor_conv_hyperparams = 8;

  // Kernel size to use for the convolution op just prior to RPN box
  // predictions.
  optional int32 first_stage_box_predictor_kernel_size = 9 [default = 3];

  // Output depth for the convolution op just prior to RPN box predictions.
  optional int32 first_stage_box_predictor_depth = 10 [default = 512];

  // The batch size to use for computing the first stage objectness and
  // location losses.
  optional int32 first_stage_minibatch_size = 11 [default = 256];

  // Fraction of positive examples per image for the RPN.
  optional float first_stage_positive_balance_fraction = 12 [default = 0.5];

  // Non max suppression score threshold applied to first stage RPN proposals.
  optional float first_stage_nms_score_threshold = 13 [default = 0.0];

  // Non max suppression IOU threshold applied to first stage RPN proposals.
  optional float first_stage_nms_iou_threshold = 14 [default = 0.7];

  // Maximum number of RPN proposals retained after first stage postprocessing.
  optional int32 first_stage_max_proposals = 15 [default = 300];

  // First stage RPN localization loss weight.
  optional float first_stage_localization_loss_weight = 16 [default = 1.0];

  // First stage RPN objectness loss weight.
  optional float first_stage_objectness_loss_weight = 17 [default = 1.0];

  // Per-region cropping parameters.
  // Note that if a R-FCN model is constructed the per region cropping
  // parameters below are ignored.

  // Output size (width and height are set to be the same) of the initial
  // bilinear interpolation based cropping during ROI pooling.
  optional int32 initial_crop_size = 18;

  // Kernel size of the max pool op on the cropped feature map during
  // ROI pooling.
  optional int32 maxpool_kernel_size = 19;

  // Stride of the max pool op on the cropped feature map during ROI pooling.
  optional int32 maxpool_stride = 20;

  // (Second stage) box classifier parameters

  // Hyperparameters for the second stage box predictor. If box predictor type
  // is set to rfcn_box_predictor, a R-FCN model is constructed, otherwise a
  // Faster R-CNN model is constructed.
  optional BoxPredictor second_stage_box_predictor = 21;

  // The batch size per image used for computing the classification and refined
  // location loss of the box classifier.
  // Note that this field is ignored if `hard_example_miner` is configured.
  optional int32 second_stage_batch_size = 22 [default = 64];

  // Fraction of positive examples to use per image for the box classifier.
  optional float second_stage_balance_fraction = 23 [default = 0.25];

  // Post processing to apply on the second stage box classifier predictions.
  // Note: the `score_converter` provided to the FasterRCNNMetaArch constructor
  // is taken from this `second_stage_post_processing` proto.
  optional PostProcessing second_stage_post_processing = 24;

  // Second stage refined localization loss weight.
  optional float second_stage_localization_loss_weight = 25 [default = 1.0];

  // Second stage classification loss weight
  optional float second_stage_classification_loss_weight = 26 [default = 1.0];

  // Second stage instance mask loss weight. Note that this is only applicable
  // when `MaskRCNNBoxPredictor` is selected for second stage and configured to
  // predict instance masks.
  optional float second_stage_mask_prediction_loss_weight = 27 [default = 1.0];

  // If not left to default, applies hard example mining only to classification
  // and localization loss..
  optional HardExampleMiner hard_example_miner = 28;

  // Loss for second stage box classifers, supports Softmax and Sigmoid.
  // Note that score converter must be consistent with loss type.
  // When there are multiple labels assigned to the same boxes, recommend
  // to use sigmoid loss and enable merge_multiple_label_boxes.
  // If not specified, Softmax loss is used as default.
  optional ClassificationLoss second_stage_classification_loss = 29;

  // Whether to update batch_norm inplace during training. This is required
  // for batch norm to work correctly on TPUs. When this is false, user must add
  // a control dependency on tf.GraphKeys.UPDATE_OPS for train/loss op in order
  // to update the batch norm moving average parameters.
  optional bool inplace_batchnorm_update = 30 [default = false];

  // Force the use of matrix multiplication based crop and resize instead of
  // standard tf.image.crop_and_resize while computing second stage input
  // feature maps.
  optional bool use_matmul_crop_and_resize = 31 [default = false];

  // Normally, anchors generated for a given image size are pruned during
  // training if they lie outside the image window. Setting this option to true,
  // clips the anchors to be within the image instead of pruning.
  optional bool clip_anchors_to_image = 32 [default = false];

  // After peforming matching between anchors and targets, in order to pull out
  // targets for training Faster R-CNN meta architecture we perform a gather
  // operation. This options specifies whether to use an alternate
  // implementation of tf.gather that is faster on TPUs.
  optional bool use_matmul_gather_in_matcher = 33 [default = false];

  // Whether to use the balanced positive negative sampler implementation with
  // static shape guarantees.
  optional bool use_static_balanced_label_sampler = 34 [default = false];

  // If True, uses implementation of ops with static shape guarantees.
  optional bool use_static_shapes = 35 [default = false];

  // Whether the masks present in groundtruth should be resized in the model to
  // match the image size.
  optional bool resize_masks = 36 [default = true];

  // If True, uses implementation of ops with static shape guarantees when
  // running evaluation (specifically not is_training if False).
  optional bool use_static_shapes_for_eval = 37 [default = false];

  // If true, uses implementation of partitioned_non_max_suppression in first
  // stage.
  optional bool use_partitioned_nms_in_first_stage = 38 [default = true];

  // Whether to return raw detections (pre NMS).
  optional bool return_raw_detections_during_predict = 39 [default = false];

  // Whether to use tf.image.combined_non_max_suppression.
  optional bool use_combined_nms_in_first_stage = 40 [default = false];

  // Whether to output final box feature. If true, it will crop the feature map
  // in the postprocess() method based on the final predictions.
  optional bool output_final_box_features = 42 [default = false];

  // Configs for context model.
  optional Context context_config = 41;
}

message Context {
  // Configuration proto for Context .
  // Next id: 4

  // The maximum number of contextual features per-image, used for padding
  optional int32 max_num_context_features = 1 [default = 2000];

  // The bottleneck feature dimension of the attention block.
  optional int32 attention_bottleneck_dimension = 2 [default = 2048];

  // The attention temperature.
  optional float attention_temperature = 3 [default = 0.01];

  // The context feature length.
  optional int32 context_feature_length = 4 [default = 2057];
}

message FasterRcnnFeatureExtractor {
  // Type of Faster R-CNN model (e.g., 'faster_rcnn_resnet101';
  // See builders/model_builder.py for expected types).
  optional string type = 1;

  // Output stride of extracted RPN feature map.
  optional int32 first_stage_features_stride = 2 [default = 16];

  // Whether to update batch norm parameters during training or not.
  // When training with a relative large batch size (e.g. 8), it could be
  // desirable to enable batch norm update.
  optional bool batch_norm_trainable = 3 [default = false];
}