NCTCMumbai's picture
Upload 2583 files
18ddfe2 verified
raw
history blame
9.29 kB
syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/image_resizer.proto";
import "object_detection/protos/losses.proto";
// Configuration for the CenterNet meta architecture from the "Objects as
// Points" paper [1]
// [1]: https://arxiv.org/abs/1904.07850
message CenterNet {
// Number of classes to predict.
optional int32 num_classes = 1;
// Feature extractor config.
optional CenterNetFeatureExtractor feature_extractor = 2;
// Image resizer for preprocessing the input image.
optional ImageResizer image_resizer = 3;
// Parameters which are related to object detection task.
message ObjectDetection {
// The original fields are moved to ObjectCenterParams or deleted.
reserved 2, 5, 6, 7;
// Weight of the task loss. The total loss of the model will be the
// summation of task losses weighted by the weights.
optional float task_loss_weight = 1 [default = 1.0];
// Weight for the offset localization loss.
optional float offset_loss_weight = 3 [default = 1.0];
// Weight for the height/width localization loss.
optional float scale_loss_weight = 4 [default = 0.1];
// Localization loss configuration for object scale and offset losses.
optional LocalizationLoss localization_loss = 8;
}
optional ObjectDetection object_detection_task = 4;
// Parameters related to object center prediction. This is required for both
// object detection and keypoint estimation tasks.
message ObjectCenterParams {
// Weight for the object center loss.
optional float object_center_loss_weight = 1 [default = 1.0];
// Classification loss configuration for object center loss.
optional ClassificationLoss classification_loss = 2;
// The initial bias value of the convlution kernel of the class heatmap
// prediction head. -2.19 corresponds to predicting foreground with
// a probability of 0.1. See "Focal Loss for Dense Object Detection"
// at https://arxiv.org/abs/1708.02002.
optional float heatmap_bias_init = 3 [default = -2.19];
// The minimum IOU overlap boxes need to have to not be penalized.
optional float min_box_overlap_iou = 4 [default = 0.7];
// Maximum number of boxes to predict.
optional int32 max_box_predictions = 5 [default = 100];
// If set, loss is only computed for the labeled classes.
optional bool use_labeled_classes = 6 [default = false];
}
optional ObjectCenterParams object_center_params = 5;
// Path of the file that conatins the label map along with the keypoint
// information, including the keypoint indices, corresponding labels, and the
// corresponding class. The file should be the same one as used in the input
// pipeline. Note that a plain text of StringIntLabelMap proto is expected in
// this file.
// It is required only if the keypoint estimation task is specified.
optional string keypoint_label_map_path = 6;
// Parameters which are related to keypoint estimation task.
message KeypointEstimation {
// Name of the task, e.g. "human pose". Note that the task name should be
// unique to each keypoint task.
optional string task_name = 1;
// Weight of the task loss. The total loss of the model will be their
// summation of task losses weighted by the weights.
optional float task_loss_weight = 2 [default = 1.0];
// Loss configuration for keypoint heatmap, offset, regression losses. Note
// that the localization loss is used for offset/regression losses and
// classification loss is used for heatmap loss.
optional Loss loss = 3;
// The name of the class that contains the keypoints for this task. This is
// used to retrieve the corresponding keypoint indices from the label map.
// Note that this corresponds to the "name" field, not "display_name".
optional string keypoint_class_name = 4;
// The standard deviation of the Gaussian kernel used to generate the
// keypoint heatmap. The unit is the pixel in the output image. It is to
// provide the flexibility of using different sizes of Gaussian kernel for
// each keypoint class. Note that if provided, the keypoint standard
// deviations will be overridden by the specified values here, otherwise,
// the default value 5.0 will be used.
// TODO(yuhuic): Update the default value once we found the best value.
map<string, float> keypoint_label_to_std = 5;
// Loss weights corresponding to different heads.
optional float keypoint_regression_loss_weight = 6 [default = 1.0];
optional float keypoint_heatmap_loss_weight = 7 [default = 1.0];
optional float keypoint_offset_loss_weight = 8 [default = 1.0];
// The initial bias value of the convolution kernel of the keypoint heatmap
// prediction head. -2.19 corresponds to predicting foreground with
// a probability of 0.1. See "Focal Loss for Dense Object Detection"
// at https://arxiv.org/abs/1708.02002.
optional float heatmap_bias_init = 9 [default = -2.19];
// The heatmap score threshold for a keypoint to become a valid candidate.
optional float keypoint_candidate_score_threshold = 10 [default = 0.1];
// The maximum number of candidates to retrieve for each keypoint.
optional int32 num_candidates_per_keypoint = 11 [default = 100];
// Max pool kernel size to use to pull off peak score locations in a
// neighborhood (independently for each keypoint types).
optional int32 peak_max_pool_kernel_size = 12 [default = 3];
// The default score to use for regressed keypoints that are not
// successfully snapped to a nearby candidate.
optional float unmatched_keypoint_score = 13 [default = 0.1];
// The multiplier to expand the bounding boxes (either the provided boxes or
// those which tightly cover the regressed keypoints). Note that new
// expanded box for an instance becomes the feasible search window for all
// associated keypoints.
optional float box_scale = 14 [default = 1.2];
// The scale parameter that multiplies the largest dimension of a bounding
// box. The resulting distance becomes a search radius for candidates in the
// vicinity of each regressed keypoint.
optional float candidate_search_scale = 15 [default = 0.3];
// One of ['min_distance', 'score_distance_ratio'] indicating how to select
// the keypoint candidate.
optional string candidate_ranking_mode = 16 [default = "min_distance"];
// The radius (in the unit of output pixel) around heatmap peak to assign
// the offset targets. If set 0, then the offset target will only be
// assigned to the heatmap peak (same behavior as the original paper).
optional int32 offset_peak_radius = 17 [default = 0];
// Indicates whether to assign offsets for each keypoint channel
// separately. If set False, the output offset target has the shape
// [batch_size, out_height, out_width, 2] (same behavior as the original
// paper). If set True, the output offset target has the shape [batch_size,
// out_height, out_width, 2 * num_keypoints] (recommended when the
// offset_peak_radius is not zero).
optional bool per_keypoint_offset = 18 [default = false];
}
repeated KeypointEstimation keypoint_estimation_task = 7;
// Parameters which are related to mask estimation task.
// Note: Currently, CenterNet supports a weak instance segmentation, where
// semantic segmentation masks are estimated, and then cropped based on
// bounding box detections. Therefore, it is possible for the same image
// pixel to be assigned to multiple instances.
message MaskEstimation {
// Weight of the task loss. The total loss of the model will be their
// summation of task losses weighted by the weights.
optional float task_loss_weight = 1 [default = 1.0];
// Classification loss configuration for segmentation loss.
optional ClassificationLoss classification_loss = 2;
// Each instance mask (one per detection) is cropped and resized (bilinear
// resampling) from the predicted segmentation feature map. After
// resampling, the masks are binarized with the provided score threshold.
optional int32 mask_height = 4 [default = 256];
optional int32 mask_width = 5 [default = 256];
optional float score_threshold = 6 [default = 0.5];
// The initial bias value of the convlution kernel of the class heatmap
// prediction head. -2.19 corresponds to predicting foreground with
// a probability of 0.1.
optional float heatmap_bias_init = 3 [default = -2.19];
}
optional MaskEstimation mask_estimation_task = 8;
}
message CenterNetFeatureExtractor {
optional string type = 1;
// Channel means to be subtracted from each image channel. If not specified,
// we use a default value of 0.
repeated float channel_means = 2;
// Channel standard deviations. Each channel will be normalized by dividing
// it by its standard deviation. If not specified, we use a default value
// of 1.
repeated float channel_stds = 3;
// If set, will change channel order to be [blue, green, red]. This can be
// useful to be compatible with some pre-trained feature extractors.
optional bool bgr_ordering = 4 [default = false];
}