File size: 9,287 Bytes
18ddfe2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 |
syntax = "proto2";
package object_detection.protos;
import "object_detection/protos/image_resizer.proto";
import "object_detection/protos/losses.proto";
// Configuration for the CenterNet meta architecture from the "Objects as
// Points" paper [1]
// [1]: https://arxiv.org/abs/1904.07850
message CenterNet {
// Number of classes to predict.
optional int32 num_classes = 1;
// Feature extractor config.
optional CenterNetFeatureExtractor feature_extractor = 2;
// Image resizer for preprocessing the input image.
optional ImageResizer image_resizer = 3;
// Parameters which are related to object detection task.
message ObjectDetection {
// The original fields are moved to ObjectCenterParams or deleted.
reserved 2, 5, 6, 7;
// Weight of the task loss. The total loss of the model will be the
// summation of task losses weighted by the weights.
optional float task_loss_weight = 1 [default = 1.0];
// Weight for the offset localization loss.
optional float offset_loss_weight = 3 [default = 1.0];
// Weight for the height/width localization loss.
optional float scale_loss_weight = 4 [default = 0.1];
// Localization loss configuration for object scale and offset losses.
optional LocalizationLoss localization_loss = 8;
}
optional ObjectDetection object_detection_task = 4;
// Parameters related to object center prediction. This is required for both
// object detection and keypoint estimation tasks.
message ObjectCenterParams {
// Weight for the object center loss.
optional float object_center_loss_weight = 1 [default = 1.0];
// Classification loss configuration for object center loss.
optional ClassificationLoss classification_loss = 2;
// The initial bias value of the convlution kernel of the class heatmap
// prediction head. -2.19 corresponds to predicting foreground with
// a probability of 0.1. See "Focal Loss for Dense Object Detection"
// at https://arxiv.org/abs/1708.02002.
optional float heatmap_bias_init = 3 [default = -2.19];
// The minimum IOU overlap boxes need to have to not be penalized.
optional float min_box_overlap_iou = 4 [default = 0.7];
// Maximum number of boxes to predict.
optional int32 max_box_predictions = 5 [default = 100];
// If set, loss is only computed for the labeled classes.
optional bool use_labeled_classes = 6 [default = false];
}
optional ObjectCenterParams object_center_params = 5;
// Path of the file that conatins the label map along with the keypoint
// information, including the keypoint indices, corresponding labels, and the
// corresponding class. The file should be the same one as used in the input
// pipeline. Note that a plain text of StringIntLabelMap proto is expected in
// this file.
// It is required only if the keypoint estimation task is specified.
optional string keypoint_label_map_path = 6;
// Parameters which are related to keypoint estimation task.
message KeypointEstimation {
// Name of the task, e.g. "human pose". Note that the task name should be
// unique to each keypoint task.
optional string task_name = 1;
// Weight of the task loss. The total loss of the model will be their
// summation of task losses weighted by the weights.
optional float task_loss_weight = 2 [default = 1.0];
// Loss configuration for keypoint heatmap, offset, regression losses. Note
// that the localization loss is used for offset/regression losses and
// classification loss is used for heatmap loss.
optional Loss loss = 3;
// The name of the class that contains the keypoints for this task. This is
// used to retrieve the corresponding keypoint indices from the label map.
// Note that this corresponds to the "name" field, not "display_name".
optional string keypoint_class_name = 4;
// The standard deviation of the Gaussian kernel used to generate the
// keypoint heatmap. The unit is the pixel in the output image. It is to
// provide the flexibility of using different sizes of Gaussian kernel for
// each keypoint class. Note that if provided, the keypoint standard
// deviations will be overridden by the specified values here, otherwise,
// the default value 5.0 will be used.
// TODO(yuhuic): Update the default value once we found the best value.
map<string, float> keypoint_label_to_std = 5;
// Loss weights corresponding to different heads.
optional float keypoint_regression_loss_weight = 6 [default = 1.0];
optional float keypoint_heatmap_loss_weight = 7 [default = 1.0];
optional float keypoint_offset_loss_weight = 8 [default = 1.0];
// The initial bias value of the convolution kernel of the keypoint heatmap
// prediction head. -2.19 corresponds to predicting foreground with
// a probability of 0.1. See "Focal Loss for Dense Object Detection"
// at https://arxiv.org/abs/1708.02002.
optional float heatmap_bias_init = 9 [default = -2.19];
// The heatmap score threshold for a keypoint to become a valid candidate.
optional float keypoint_candidate_score_threshold = 10 [default = 0.1];
// The maximum number of candidates to retrieve for each keypoint.
optional int32 num_candidates_per_keypoint = 11 [default = 100];
// Max pool kernel size to use to pull off peak score locations in a
// neighborhood (independently for each keypoint types).
optional int32 peak_max_pool_kernel_size = 12 [default = 3];
// The default score to use for regressed keypoints that are not
// successfully snapped to a nearby candidate.
optional float unmatched_keypoint_score = 13 [default = 0.1];
// The multiplier to expand the bounding boxes (either the provided boxes or
// those which tightly cover the regressed keypoints). Note that new
// expanded box for an instance becomes the feasible search window for all
// associated keypoints.
optional float box_scale = 14 [default = 1.2];
// The scale parameter that multiplies the largest dimension of a bounding
// box. The resulting distance becomes a search radius for candidates in the
// vicinity of each regressed keypoint.
optional float candidate_search_scale = 15 [default = 0.3];
// One of ['min_distance', 'score_distance_ratio'] indicating how to select
// the keypoint candidate.
optional string candidate_ranking_mode = 16 [default = "min_distance"];
// The radius (in the unit of output pixel) around heatmap peak to assign
// the offset targets. If set 0, then the offset target will only be
// assigned to the heatmap peak (same behavior as the original paper).
optional int32 offset_peak_radius = 17 [default = 0];
// Indicates whether to assign offsets for each keypoint channel
// separately. If set False, the output offset target has the shape
// [batch_size, out_height, out_width, 2] (same behavior as the original
// paper). If set True, the output offset target has the shape [batch_size,
// out_height, out_width, 2 * num_keypoints] (recommended when the
// offset_peak_radius is not zero).
optional bool per_keypoint_offset = 18 [default = false];
}
repeated KeypointEstimation keypoint_estimation_task = 7;
// Parameters which are related to mask estimation task.
// Note: Currently, CenterNet supports a weak instance segmentation, where
// semantic segmentation masks are estimated, and then cropped based on
// bounding box detections. Therefore, it is possible for the same image
// pixel to be assigned to multiple instances.
message MaskEstimation {
// Weight of the task loss. The total loss of the model will be their
// summation of task losses weighted by the weights.
optional float task_loss_weight = 1 [default = 1.0];
// Classification loss configuration for segmentation loss.
optional ClassificationLoss classification_loss = 2;
// Each instance mask (one per detection) is cropped and resized (bilinear
// resampling) from the predicted segmentation feature map. After
// resampling, the masks are binarized with the provided score threshold.
optional int32 mask_height = 4 [default = 256];
optional int32 mask_width = 5 [default = 256];
optional float score_threshold = 6 [default = 0.5];
// The initial bias value of the convlution kernel of the class heatmap
// prediction head. -2.19 corresponds to predicting foreground with
// a probability of 0.1.
optional float heatmap_bias_init = 3 [default = -2.19];
}
optional MaskEstimation mask_estimation_task = 8;
}
message CenterNetFeatureExtractor {
optional string type = 1;
// Channel means to be subtracted from each image channel. If not specified,
// we use a default value of 0.
repeated float channel_means = 2;
// Channel standard deviations. Each channel will be normalized by dividing
// it by its standard deviation. If not specified, we use a default value
// of 1.
repeated float channel_stds = 3;
// If set, will change channel order to be [blue, green, red]. This can be
// useful to be compatible with some pre-trained feature extractors.
optional bool bgr_ordering = 4 [default = false];
}
|