Spaces:

NCTCMumbai
/

AdvaitBERT-AI_Explanability

Running

App Files Files Community

AdvaitBERT-AI_Explanability / models /research /object_detection /protos /center_net.proto

NCTCMumbai

Upload 2583 files

18ddfe2 verified about 1 year ago

raw

history blame

9.29 kB

	syntax = "proto2";

	package object_detection.protos;

	import "object_detection/protos/image_resizer.proto";
	import "object_detection/protos/losses.proto";

	// Configuration for the CenterNet meta architecture from the "Objects as
	// Points" paper [1]
	// [1]: https://arxiv.org/abs/1904.07850

	message CenterNet {
	// Number of classes to predict.
	optional int32 num_classes = 1;

	// Feature extractor config.
	optional CenterNetFeatureExtractor feature_extractor = 2;

	// Image resizer for preprocessing the input image.
	optional ImageResizer image_resizer = 3;

	// Parameters which are related to object detection task.
	message ObjectDetection {
	// The original fields are moved to ObjectCenterParams or deleted.
	reserved 2, 5, 6, 7;

	// Weight of the task loss. The total loss of the model will be the
	// summation of task losses weighted by the weights.
	optional float task_loss_weight = 1 [default = 1.0];

	// Weight for the offset localization loss.
	optional float offset_loss_weight = 3 [default = 1.0];

	// Weight for the height/width localization loss.
	optional float scale_loss_weight = 4 [default = 0.1];

	// Localization loss configuration for object scale and offset losses.
	optional LocalizationLoss localization_loss = 8;
	}
	optional ObjectDetection object_detection_task = 4;

	// Parameters related to object center prediction. This is required for both
	// object detection and keypoint estimation tasks.
	message ObjectCenterParams {
	// Weight for the object center loss.
	optional float object_center_loss_weight = 1 [default = 1.0];

	// Classification loss configuration for object center loss.
	optional ClassificationLoss classification_loss = 2;

	// The initial bias value of the convlution kernel of the class heatmap
	// prediction head. -2.19 corresponds to predicting foreground with
	// a probability of 0.1. See "Focal Loss for Dense Object Detection"
	// at https://arxiv.org/abs/1708.02002.
	optional float heatmap_bias_init = 3 [default = -2.19];

	// The minimum IOU overlap boxes need to have to not be penalized.
	optional float min_box_overlap_iou = 4 [default = 0.7];

	// Maximum number of boxes to predict.
	optional int32 max_box_predictions = 5 [default = 100];

	// If set, loss is only computed for the labeled classes.
	optional bool use_labeled_classes = 6 [default = false];
	}
	optional ObjectCenterParams object_center_params = 5;

	// Path of the file that conatins the label map along with the keypoint
	// information, including the keypoint indices, corresponding labels, and the
	// corresponding class. The file should be the same one as used in the input
	// pipeline. Note that a plain text of StringIntLabelMap proto is expected in
	// this file.
	// It is required only if the keypoint estimation task is specified.
	optional string keypoint_label_map_path = 6;

	// Parameters which are related to keypoint estimation task.
	message KeypointEstimation {
	// Name of the task, e.g. "human pose". Note that the task name should be
	// unique to each keypoint task.
	optional string task_name = 1;

	// Weight of the task loss. The total loss of the model will be their
	// summation of task losses weighted by the weights.
	optional float task_loss_weight = 2 [default = 1.0];

	// Loss configuration for keypoint heatmap, offset, regression losses. Note
	// that the localization loss is used for offset/regression losses and
	// classification loss is used for heatmap loss.
	optional Loss loss = 3;

	// The name of the class that contains the keypoints for this task. This is
	// used to retrieve the corresponding keypoint indices from the label map.
	// Note that this corresponds to the "name" field, not "display_name".
	optional string keypoint_class_name = 4;

	// The standard deviation of the Gaussian kernel used to generate the
	// keypoint heatmap. The unit is the pixel in the output image. It is to
	// provide the flexibility of using different sizes of Gaussian kernel for
	// each keypoint class. Note that if provided, the keypoint standard
	// deviations will be overridden by the specified values here, otherwise,
	// the default value 5.0 will be used.
	// TODO(yuhuic): Update the default value once we found the best value.
	map<string, float> keypoint_label_to_std = 5;

	// Loss weights corresponding to different heads.
	optional float keypoint_regression_loss_weight = 6 [default = 1.0];
	optional float keypoint_heatmap_loss_weight = 7 [default = 1.0];
	optional float keypoint_offset_loss_weight = 8 [default = 1.0];

	// The initial bias value of the convolution kernel of the keypoint heatmap
	// prediction head. -2.19 corresponds to predicting foreground with
	// a probability of 0.1. See "Focal Loss for Dense Object Detection"
	// at https://arxiv.org/abs/1708.02002.
	optional float heatmap_bias_init = 9 [default = -2.19];

	// The heatmap score threshold for a keypoint to become a valid candidate.
	optional float keypoint_candidate_score_threshold = 10 [default = 0.1];

	// The maximum number of candidates to retrieve for each keypoint.
	optional int32 num_candidates_per_keypoint = 11 [default = 100];

	// Max pool kernel size to use to pull off peak score locations in a
	// neighborhood (independently for each keypoint types).
	optional int32 peak_max_pool_kernel_size = 12 [default = 3];

	// The default score to use for regressed keypoints that are not
	// successfully snapped to a nearby candidate.
	optional float unmatched_keypoint_score = 13 [default = 0.1];

	// The multiplier to expand the bounding boxes (either the provided boxes or
	// those which tightly cover the regressed keypoints). Note that new
	// expanded box for an instance becomes the feasible search window for all
	// associated keypoints.
	optional float box_scale = 14 [default = 1.2];

	// The scale parameter that multiplies the largest dimension of a bounding
	// box. The resulting distance becomes a search radius for candidates in the
	// vicinity of each regressed keypoint.
	optional float candidate_search_scale = 15 [default = 0.3];

	// One of ['min_distance', 'score_distance_ratio'] indicating how to select
	// the keypoint candidate.
	optional string candidate_ranking_mode = 16 [default = "min_distance"];

	// The radius (in the unit of output pixel) around heatmap peak to assign
	// the offset targets. If set 0, then the offset target will only be
	// assigned to the heatmap peak (same behavior as the original paper).
	optional int32 offset_peak_radius = 17 [default = 0];

	// Indicates whether to assign offsets for each keypoint channel
	// separately. If set False, the output offset target has the shape
	// [batch_size, out_height, out_width, 2] (same behavior as the original
	// paper). If set True, the output offset target has the shape [batch_size,
	// out_height, out_width, 2 * num_keypoints] (recommended when the
	// offset_peak_radius is not zero).
	optional bool per_keypoint_offset = 18 [default = false];
	}
	repeated KeypointEstimation keypoint_estimation_task = 7;

	// Parameters which are related to mask estimation task.
	// Note: Currently, CenterNet supports a weak instance segmentation, where
	// semantic segmentation masks are estimated, and then cropped based on
	// bounding box detections. Therefore, it is possible for the same image
	// pixel to be assigned to multiple instances.
	message MaskEstimation {
	// Weight of the task loss. The total loss of the model will be their
	// summation of task losses weighted by the weights.
	optional float task_loss_weight = 1 [default = 1.0];

	// Classification loss configuration for segmentation loss.
	optional ClassificationLoss classification_loss = 2;

	// Each instance mask (one per detection) is cropped and resized (bilinear
	// resampling) from the predicted segmentation feature map. After
	// resampling, the masks are binarized with the provided score threshold.
	optional int32 mask_height = 4 [default = 256];
	optional int32 mask_width = 5 [default = 256];
	optional float score_threshold = 6 [default = 0.5];

	// The initial bias value of the convlution kernel of the class heatmap
	// prediction head. -2.19 corresponds to predicting foreground with
	// a probability of 0.1.
	optional float heatmap_bias_init = 3 [default = -2.19];
	}
	optional MaskEstimation mask_estimation_task = 8;
	}

	message CenterNetFeatureExtractor {
	optional string type = 1;

	// Channel means to be subtracted from each image channel. If not specified,
	// we use a default value of 0.
	repeated float channel_means = 2;

	// Channel standard deviations. Each channel will be normalized by dividing
	// it by its standard deviation. If not specified, we use a default value
	// of 1.
	repeated float channel_stds = 3;

	// If set, will change channel order to be [blue, green, red]. This can be
	// useful to be compatible with some pre-trained feature extractors.
	optional bool bgr_ordering = 4 [default = false];
	}