syntax = "proto2"; | |
package object_detection.protos; | |
import "object_detection/protos/anchor_generator.proto"; | |
import "object_detection/protos/box_predictor.proto"; | |
import "object_detection/protos/hyperparams.proto"; | |
import "object_detection/protos/image_resizer.proto"; | |
import "object_detection/protos/losses.proto"; | |
import "object_detection/protos/post_processing.proto"; | |
// Configuration for Faster R-CNN models. | |
// See meta_architectures/ and models/ | |
// | |
// Naming conventions: | |
// Faster R-CNN models have two stages: a first stage region proposal network | |
// (or RPN) and a second stage box classifier. We thus use the prefixes | |
// `first_stage_` and `second_stage_` to indicate the stage to which each | |
// parameter pertains when relevant. | |
message FasterRcnn { | |
// Whether to construct only the Region Proposal Network (RPN). | |
optional int32 number_of_stages = 1 [default = 2]; | |
// Number of classes to predict. | |
optional int32 num_classes = 3; | |
// Image resizer for preprocessing the input image. | |
optional ImageResizer image_resizer = 4; | |
// Feature extractor config. | |
optional FasterRcnnFeatureExtractor feature_extractor = 5; | |
// (First stage) region proposal network (RPN) parameters. | |
// Anchor generator to compute RPN anchors. | |
optional AnchorGenerator first_stage_anchor_generator = 6; | |
// Atrous rate for the convolution op applied to the | |
// `first_stage_features_to_crop` tensor to obtain box predictions. | |
optional int32 first_stage_atrous_rate = 7 [default = 1]; | |
// Hyperparameters for the convolutional RPN box predictor. | |
optional Hyperparams first_stage_box_predictor_conv_hyperparams = 8; | |
// Kernel size to use for the convolution op just prior to RPN box | |
// predictions. | |
optional int32 first_stage_box_predictor_kernel_size = 9 [default = 3]; | |
// Output depth for the convolution op just prior to RPN box predictions. | |
optional int32 first_stage_box_predictor_depth = 10 [default = 512]; | |
// The batch size to use for computing the first stage objectness and | |
// location losses. | |
optional int32 first_stage_minibatch_size = 11 [default = 256]; | |
// Fraction of positive examples per image for the RPN. | |
optional float first_stage_positive_balance_fraction = 12 [default = 0.5]; | |
// Non max suppression score threshold applied to first stage RPN proposals. | |
optional float first_stage_nms_score_threshold = 13 [default = 0.0]; | |
// Non max suppression IOU threshold applied to first stage RPN proposals. | |
optional float first_stage_nms_iou_threshold = 14 [default = 0.7]; | |
// Maximum number of RPN proposals retained after first stage postprocessing. | |
optional int32 first_stage_max_proposals = 15 [default = 300]; | |
// First stage RPN localization loss weight. | |
optional float first_stage_localization_loss_weight = 16 [default = 1.0]; | |
// First stage RPN objectness loss weight. | |
optional float first_stage_objectness_loss_weight = 17 [default = 1.0]; | |
// Per-region cropping parameters. | |
// Note that if a R-FCN model is constructed the per region cropping | |
// parameters below are ignored. | |
// Output size (width and height are set to be the same) of the initial | |
// bilinear interpolation based cropping during ROI pooling. | |
optional int32 initial_crop_size = 18; | |
// Kernel size of the max pool op on the cropped feature map during | |
// ROI pooling. | |
optional int32 maxpool_kernel_size = 19; | |
// Stride of the max pool op on the cropped feature map during ROI pooling. | |
optional int32 maxpool_stride = 20; | |
// (Second stage) box classifier parameters | |
// Hyperparameters for the second stage box predictor. If box predictor type | |
// is set to rfcn_box_predictor, a R-FCN model is constructed, otherwise a | |
// Faster R-CNN model is constructed. | |
optional BoxPredictor second_stage_box_predictor = 21; | |
// The batch size per image used for computing the classification and refined | |
// location loss of the box classifier. | |
// Note that this field is ignored if `hard_example_miner` is configured. | |
optional int32 second_stage_batch_size = 22 [default = 64]; | |
// Fraction of positive examples to use per image for the box classifier. | |
optional float second_stage_balance_fraction = 23 [default = 0.25]; | |
// Post processing to apply on the second stage box classifier predictions. | |
// Note: the `score_converter` provided to the FasterRCNNMetaArch constructor | |
// is taken from this `second_stage_post_processing` proto. | |
optional PostProcessing second_stage_post_processing = 24; | |
// Second stage refined localization loss weight. | |
optional float second_stage_localization_loss_weight = 25 [default = 1.0]; | |
// Second stage classification loss weight | |
optional float second_stage_classification_loss_weight = 26 [default = 1.0]; | |
// Second stage instance mask loss weight. Note that this is only applicable | |
// when `MaskRCNNBoxPredictor` is selected for second stage and configured to | |
// predict instance masks. | |
optional float second_stage_mask_prediction_loss_weight = 27 [default = 1.0]; | |
// If not left to default, applies hard example mining only to classification | |
// and localization loss.. | |
optional HardExampleMiner hard_example_miner = 28; | |
// Loss for second stage box classifers, supports Softmax and Sigmoid. | |
// Note that score converter must be consistent with loss type. | |
// When there are multiple labels assigned to the same boxes, recommend | |
// to use sigmoid loss and enable merge_multiple_label_boxes. | |
// If not specified, Softmax loss is used as default. | |
optional ClassificationLoss second_stage_classification_loss = 29; | |
// Whether to update batch_norm inplace during training. This is required | |
// for batch norm to work correctly on TPUs. When this is false, user must add | |
// a control dependency on tf.GraphKeys.UPDATE_OPS for train/loss op in order | |
// to update the batch norm moving average parameters. | |
optional bool inplace_batchnorm_update = 30 [default = false]; | |
// Force the use of matrix multiplication based crop and resize instead of | |
// standard tf.image.crop_and_resize while computing second stage input | |
// feature maps. | |
optional bool use_matmul_crop_and_resize = 31 [default = false]; | |
// Normally, anchors generated for a given image size are pruned during | |
// training if they lie outside the image window. Setting this option to true, | |
// clips the anchors to be within the image instead of pruning. | |
optional bool clip_anchors_to_image = 32 [default = false]; | |
// After peforming matching between anchors and targets, in order to pull out | |
// targets for training Faster R-CNN meta architecture we perform a gather | |
// operation. This options specifies whether to use an alternate | |
// implementation of tf.gather that is faster on TPUs. | |
optional bool use_matmul_gather_in_matcher = 33 [default = false]; | |
// Whether to use the balanced positive negative sampler implementation with | |
// static shape guarantees. | |
optional bool use_static_balanced_label_sampler = 34 [default = false]; | |
// If True, uses implementation of ops with static shape guarantees. | |
optional bool use_static_shapes = 35 [default = false]; | |
// Whether the masks present in groundtruth should be resized in the model to | |
// match the image size. | |
optional bool resize_masks = 36 [default = true]; | |
// If True, uses implementation of ops with static shape guarantees when | |
// running evaluation (specifically not is_training if False). | |
optional bool use_static_shapes_for_eval = 37 [default = false]; | |
// If true, uses implementation of partitioned_non_max_suppression in first | |
// stage. | |
optional bool use_partitioned_nms_in_first_stage = 38 [default = true]; | |
// Whether to return raw detections (pre NMS). | |
optional bool return_raw_detections_during_predict = 39 [default = false]; | |
// Whether to use tf.image.combined_non_max_suppression. | |
optional bool use_combined_nms_in_first_stage = 40 [default = false]; | |
// Whether to output final box feature. If true, it will crop the feature map | |
// in the postprocess() method based on the final predictions. | |
optional bool output_final_box_features = 42 [default = false]; | |
// Configs for context model. | |
optional Context context_config = 41; | |
} | |
message Context { | |
// Configuration proto for Context . | |
// Next id: 4 | |
// The maximum number of contextual features per-image, used for padding | |
optional int32 max_num_context_features = 1 [default = 2000]; | |
// The bottleneck feature dimension of the attention block. | |
optional int32 attention_bottleneck_dimension = 2 [default = 2048]; | |
// The attention temperature. | |
optional float attention_temperature = 3 [default = 0.01]; | |
// The context feature length. | |
optional int32 context_feature_length = 4 [default = 2057]; | |
} | |
message FasterRcnnFeatureExtractor { | |
// Type of Faster R-CNN model (e.g., 'faster_rcnn_resnet101'; | |
// See builders/ for expected types). | |
optional string type = 1; | |
// Output stride of extracted RPN feature map. | |
optional int32 first_stage_features_stride = 2 [default = 16]; | |
// Whether to update batch norm parameters during training or not. | |
// When training with a relative large batch size (e.g. 8), it could be | |
// desirable to enable batch norm update. | |
optional bool batch_norm_trainable = 3 [default = false]; | |
} | |