Spaces:
Build error
Build error
""" | |
Mask R-CNN | |
Base Configurations class. | |
Copyright (c) 2017 Matterport, Inc. | |
Licensed under the MIT License (see LICENSE for details) | |
Written by Waleed Abdulla | |
""" | |
import numpy as np | |
# Base Configuration Class | |
# Don't use this class directly. Instead, sub-class it and override | |
# the configurations you need to change. | |
class Config(object): | |
"""Base configuration class. For custom configurations, create a | |
sub-class that inherits from this one and override properties | |
that need to be changed. | |
""" | |
# Name the configurations. For example, 'COCO', 'Experiment 3', ...etc. | |
# Useful if your code needs to do things differently depending on which | |
# experiment is running. | |
NAME = None # Override in sub-classes | |
# NUMBER OF GPUs to use. When using only a CPU, this needs to be set to 1. | |
GPU_COUNT = 1 | |
# Number of images to train with on each GPU. A 12GB GPU can typically | |
# handle 2 images of 1024x1024px. | |
# Adjust based on your GPU memory and image sizes. Use the highest | |
# number that your GPU can handle for best performance. | |
IMAGES_PER_GPU = 2 | |
# Number of training steps per epoch | |
# This doesn't need to match the size of the training set. Tensorboard | |
# updates are saved at the end of each epoch, so setting this to a | |
# smaller number means getting more frequent TensorBoard updates. | |
# Validation stats are also calculated at each epoch end and they | |
# might take a while, so don't set this too small to avoid spending | |
# a lot of time on validation stats. | |
STEPS_PER_EPOCH = 1000 | |
# Number of validation steps to run at the end of every training epoch. | |
# A bigger number improves accuracy of validation stats, but slows | |
# down the training. | |
VALIDATION_STEPS = 50 | |
# Backbone network architecture | |
# Supported values are: resnet50, resnet101. | |
# You can also provide a callable that should have the signature | |
# of model.resnet_graph. If you do so, you need to supply a callable | |
# to COMPUTE_BACKBONE_SHAPE as well | |
BACKBONE = "resnet101" | |
# Only useful if you supply a callable to BACKBONE. Should compute | |
# the shape of each layer of the FPN Pyramid. | |
# See model.compute_backbone_shapes | |
COMPUTE_BACKBONE_SHAPE = None | |
# The strides of each layer of the FPN Pyramid. These values | |
# are based on a Resnet101 backbone. | |
BACKBONE_STRIDES = [4, 8, 16, 32, 64] | |
# Size of the fully-connected layers in the classification graph | |
FPN_CLASSIF_FC_LAYERS_SIZE = 1024 | |
# Size of the top-down layers used to build the feature pyramid | |
TOP_DOWN_PYRAMID_SIZE = 256 | |
# Number of classification classes (including background) | |
NUM_CLASSES = 1 # Override in sub-classes | |
# Length of square anchor side in pixels | |
RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512) | |
# Ratios of anchors at each cell (width/height) | |
# A value of 1 represents a square anchor, and 0.5 is a wide anchor | |
RPN_ANCHOR_RATIOS = [0.5, 1, 2] | |
# Anchor stride | |
# If 1 then anchors are created for each cell in the backbone feature map. | |
# If 2, then anchors are created for every other cell, and so on. | |
RPN_ANCHOR_STRIDE = 1 | |
# Non-max suppression threshold to filter RPN proposals. | |
# You can increase this during training to generate more propsals. | |
RPN_NMS_THRESHOLD = 0.7 | |
# How many anchors per image to use for RPN training | |
RPN_TRAIN_ANCHORS_PER_IMAGE = 256 | |
# ROIs kept after tf.nn.top_k and before non-maximum suppression | |
PRE_NMS_LIMIT = 6000 | |
# ROIs kept after non-maximum suppression (training and inference) | |
POST_NMS_ROIS_TRAINING = 2000 | |
POST_NMS_ROIS_INFERENCE = 1000 | |
# If enabled, resizes instance masks to a smaller size to reduce | |
# memory load. Recommended when using high-resolution images. | |
USE_MINI_MASK = True | |
MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask | |
# Input image resizing | |
# Generally, use the "square" resizing mode for training and predicting | |
# and it should work well in most cases. In this mode, images are scaled | |
# up such that the small side is = IMAGE_MIN_DIM, but ensuring that the | |
# scaling doesn't make the long side > IMAGE_MAX_DIM. Then the image is | |
# padded with zeros to make it a square so multiple images can be put | |
# in one batch. | |
# Available resizing modes: | |
# none: No resizing or padding. Return the image unchanged. | |
# square: Resize and pad with zeros to get a square image | |
# of size [max_dim, max_dim]. | |
# pad64: Pads width and height with zeros to make them multiples of 64. | |
# If IMAGE_MIN_DIM or IMAGE_MIN_SCALE are not None, then it scales | |
# up before padding. IMAGE_MAX_DIM is ignored in this mode. | |
# The multiple of 64 is needed to ensure smooth scaling of feature | |
# maps up and down the 6 levels of the FPN pyramid (2**6=64). | |
# crop: Picks random crops from the image. First, scales the image based | |
# on IMAGE_MIN_DIM and IMAGE_MIN_SCALE, then picks a random crop of | |
# size IMAGE_MIN_DIM x IMAGE_MIN_DIM. Can be used in training only. | |
# IMAGE_MAX_DIM is not used in this mode. | |
IMAGE_RESIZE_MODE = "square" | |
IMAGE_MIN_DIM = 800 | |
IMAGE_MAX_DIM = 1024 | |
# Minimum scaling ratio. Checked after MIN_IMAGE_DIM and can force further | |
# up scaling. For example, if set to 2 then images are scaled up to double | |
# the width and height, or more, even if MIN_IMAGE_DIM doesn't require it. | |
# However, in 'square' mode, it can be overruled by IMAGE_MAX_DIM. | |
IMAGE_MIN_SCALE = 0 | |
# Number of color channels per image. RGB = 3, grayscale = 1, RGB-D = 4 | |
# Changing this requires other changes in the code. See the WIKI for more | |
# details: https://github.com/matterport/Mask_RCNN/wiki | |
IMAGE_CHANNEL_COUNT = 3 | |
# Image mean (RGB) | |
MEAN_PIXEL = np.array([123.7, 116.8, 103.9]) | |
# Number of ROIs per image to feed to classifier/mask heads | |
# The Mask RCNN paper uses 512 but often the RPN doesn't generate | |
# enough positive proposals to fill this and keep a positive:negative | |
# ratio of 1:3. You can increase the number of proposals by adjusting | |
# the RPN NMS threshold. | |
TRAIN_ROIS_PER_IMAGE = 200 | |
# Percent of positive ROIs used to train classifier/mask heads | |
ROI_POSITIVE_RATIO = 0.33 | |
# Pooled ROIs | |
POOL_SIZE = 7 | |
MASK_POOL_SIZE = 14 | |
# Shape of output mask | |
# To change this you also need to change the neural network mask branch | |
MASK_SHAPE = [28, 28] | |
# Maximum number of ground truth instances to use in one image | |
MAX_GT_INSTANCES = 100 | |
# Bounding box refinement standard deviation for RPN and final detections. | |
RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) | |
BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) | |
# Max number of final detections | |
DETECTION_MAX_INSTANCES = 100 | |
# Minimum probability value to accept a detected instance | |
# ROIs below this threshold are skipped | |
DETECTION_MIN_CONFIDENCE = 0.7 | |
# Non-maximum suppression threshold for detection | |
DETECTION_NMS_THRESHOLD = 0.3 | |
# Learning rate and momentum | |
# The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes | |
# weights to explode. Likely due to differences in optimizer | |
# implementation. | |
LEARNING_RATE = 0.001 | |
LEARNING_MOMENTUM = 0.9 | |
# Weight decay regularization | |
WEIGHT_DECAY = 0.0001 | |
# Loss weights for more precise optimization. | |
# Can be used for R-CNN training setup. | |
LOSS_WEIGHTS = { | |
"rpn_class_loss": 1.0, | |
"rpn_bbox_loss": 1.0, | |
"mrcnn_class_loss": 1.0, | |
"mrcnn_bbox_loss": 1.0, | |
"mrcnn_mask_loss": 1.0, | |
} | |
# Use RPN ROIs or externally generated ROIs for training | |
# Keep this True for most situations. Set to False if you want to train | |
# the head branches on ROI generated by code rather than the ROIs from | |
# the RPN. For example, to debug the classifier head without having to | |
# train the RPN. | |
USE_RPN_ROIS = True | |
# Train or freeze batch normalization layers | |
# None: Train BN layers. This is the normal mode | |
# False: Freeze BN layers. Good when using a small batch size | |
# True: (don't use). Set layer in training mode even when predicting | |
TRAIN_BN = False # Defaulting to False since batch size is often small | |
# Gradient norm clipping | |
GRADIENT_CLIP_NORM = 5.0 | |
def __init__(self): | |
"""Set values of computed attributes.""" | |
# Effective batch size | |
self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT | |
# Input image size | |
if self.IMAGE_RESIZE_MODE == "crop": | |
self.IMAGE_SHAPE = np.array( | |
[self.IMAGE_MIN_DIM, self.IMAGE_MIN_DIM, self.IMAGE_CHANNEL_COUNT] | |
) | |
else: | |
self.IMAGE_SHAPE = np.array( | |
[self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, self.IMAGE_CHANNEL_COUNT] | |
) | |
# Image meta data length | |
# See compose_image_meta() for details | |
self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES | |
def display(self): | |
"""Display Configuration values.""" | |
print("\nConfigurations:") | |
for a in dir(self): | |
if not a.startswith("__") and not callable(getattr(self, a)): | |
print("{:30} {}".format(a, getattr(self, a))) | |
print("\n") | |