diff --git a/assets/main_fig.png b/assets/main_fig.png new file mode 100644 index 0000000000000000000000000000000000000000..077a94b4988bd4a86e94da94b4c31f80de312398 Binary files /dev/null and b/assets/main_fig.png differ diff --git a/configs/ground-truth-warmup/Base-COCO-PanopticSegmentation.yaml b/configs/ground-truth-warmup/Base-COCO-PanopticSegmentation.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0cad56bb9d119f7377133a57736e2ccb1b2ff890 --- /dev/null +++ b/configs/ground-truth-warmup/Base-COCO-PanopticSegmentation.yaml @@ -0,0 +1,60 @@ +MODEL: + BACKBONE: + FREEZE_AT: 0 + NAME: "build_resnet_backbone" + WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" + PIXEL_MEAN: [123.675, 116.280, 103.530] + PIXEL_STD: [58.395, 57.120, 57.375] + RESNETS: + DEPTH: 50 + STEM_TYPE: "basic" # not used + STEM_OUT_CHANNELS: 64 + STRIDE_IN_1X1: False + OUT_FEATURES: ["res2", "res3", "res4", "res5"] + # NORM: "SyncBN" + RES5_MULTI_GRID: [1, 1, 1] # not used + +SOLVER: + IMS_PER_BATCH: 8 + BASE_LR: 0.0001 + STEPS: (260231, 283888) + MAX_ITER: 295717 + WARMUP_FACTOR: 1.0 + WARMUP_ITERS: 10 + CHECKPOINT_PERIOD: 10000 + WEIGHT_DECAY: 0.05 + OPTIMIZER: "ADAMW" + BACKBONE_MULTIPLIER: 0.1 + CLIP_GRADIENTS: + ENABLED: True + CLIP_TYPE: "full_model" + CLIP_VALUE: 1.0 + NORM_TYPE: 2.0 + AMP: + ENABLED: True +INPUT: + IMAGE_SIZE: 768 + MIN_SCALE: 0.1 + MAX_SCALE: 2.0 + FORMAT: "RGB" + MIN_SIZE_TRAIN: (1024,) + MAX_SIZE_TRAIN: 1024 + DATASET_MAPPER_NAME: "coco_combine_lsj" + MASK_FORMAT: "bitmask" + COLOR_AUG_SSD: True + +DATASETS: + TRAIN: ("openvocab_coco_2017_train_panoptic_with_sem_seg",) + TEST: ("openvocab_ade20k_panoptic_val",) # to evaluate instance and semantic performance as well +DATALOADER: + SAMPLER_TRAIN: "MultiDatasetSampler" + USE_DIFF_BS_SIZE: False + DATASET_RATIO: [1.0] + DATASET_BS: [2] + USE_RFS: [False] + NUM_WORKERS: 8 + DATASET_ANN: ['mask'] + ASPECT_RATIO_GROUPING: True +TEST: + EVAL_PERIOD: 10000 +VERSION: 2 diff --git a/configs/ground-truth-warmup/mask-adapter/mask_adapter_convnext_large_cocopan_eval_ade20k.yaml b/configs/ground-truth-warmup/mask-adapter/mask_adapter_convnext_large_cocopan_eval_ade20k.yaml new file mode 100644 index 0000000000000000000000000000000000000000..964f88abe5542ec02c42271bf44f7a4b118bab3a --- /dev/null +++ b/configs/ground-truth-warmup/mask-adapter/mask_adapter_convnext_large_cocopan_eval_ade20k.yaml @@ -0,0 +1,40 @@ +_BASE_: ../maskformer2_R50_bs16_50ep.yaml +MODEL: + META_ARCHITECTURE: "MASK_Adapter" + MASK_ADAPTER: + NAME: "MASKAdapterHead" + MASK_IN_CHANNELS: 16 + NUM_CHANNELS: 768 + USE_CHECKPOINT: False + NUM_OUTPUT_MAPS: 16 + # backbone part. + BACKBONE: + NAME: "CLIP" + WEIGHTS: "" + PIXEL_MEAN: [122.7709383, 116.7460125, 104.09373615] + PIXEL_STD: [68.5005327, 66.6321579, 70.32316305] + FC_CLIP: + CLIP_MODEL_NAME: "convnext_large_d_320" + CLIP_PRETRAINED_WEIGHTS: "laion2b_s29b_b131k_ft_soup" + EMBED_DIM: 768 + GEOMETRIC_ENSEMBLE_ALPHA: -1.0 + GEOMETRIC_ENSEMBLE_BETA: -1.0 + MASK_FORMER: + NUM_OBJECT_QUERIES: 250 + TEST: + SEMANTIC_ON: True + INSTANCE_ON: True + PANOPTIC_ON: True + OVERLAP_THRESHOLD: 0.8 + OBJECT_MASK_THRESHOLD: 0.0 + +INPUT: + DATASET_MAPPER_NAME: "coco_panoptic_lsj" + +DATALOADER: + SAMPLER_TRAIN: "TrainingSampler" + +DATASETS: + TRAIN: ("openvocab_coco_2017_train_panoptic_with_sem_seg",) + TEST: ("openvocab_ade20k_panoptic_val",) +OUTPUT_DIR: ./training/first-phase/fcclip-l-adapter diff --git a/configs/ground-truth-warmup/mask-adapter/mask_adapter_maft_convnext_base_cocostuff_eval_ade20k.yaml b/configs/ground-truth-warmup/mask-adapter/mask_adapter_maft_convnext_base_cocostuff_eval_ade20k.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7dc36d52d84eda8be17b86b30fbfcb2fe3cb5f96 --- /dev/null +++ b/configs/ground-truth-warmup/mask-adapter/mask_adapter_maft_convnext_base_cocostuff_eval_ade20k.yaml @@ -0,0 +1,40 @@ +_BASE_: ../maskformer2_R50_bs16_50ep.yaml +MODEL: + META_ARCHITECTURE: "MASK_Adapter" + MASK_ADAPTER: + NAME: "MASKAdapterHead" + MASK_IN_CHANNELS: 16 + NUM_CHANNELS: 768 + USE_CHECKPOINT: False + NUM_OUTPUT_MAPS: 16 + TRAIN_MAFT: True + # backbone part. + BACKBONE: + NAME: "CLIP" + WEIGHTS: "" + PIXEL_MEAN: [122.7709383, 116.7460125, 104.09373615] + PIXEL_STD: [68.5005327, 66.6321579, 70.32316305] + FC_CLIP: + CLIP_MODEL_NAME: "convnext_base_w_320" + CLIP_PRETRAINED_WEIGHTS: "laion_aesthetic_s13b_b82k_augreg" + EMBED_DIM: 640 + GEOMETRIC_ENSEMBLE_ALPHA: -1.0 + GEOMETRIC_ENSEMBLE_BETA: -1.0 + MASK_FORMER: + NUM_OBJECT_QUERIES: 250 + TEST: + SEMANTIC_ON: True + INSTANCE_ON: True + PANOPTIC_ON: True + OVERLAP_THRESHOLD: 0.8 + OBJECT_MASK_THRESHOLD: 0.0 + +INPUT: + DATASET_MAPPER_NAME: "mask_former_semantic" + +DATASETS: + TRAIN: ("openvocab_coco_2017_train_stuff_sem_seg",) + TEST: ("openvocab_ade20k_panoptic_val",) +DATALOADER: + SAMPLER_TRAIN: "TrainingSampler" +OUTPUT_DIR: ./training/first-phase/maft_b_adapter diff --git a/configs/ground-truth-warmup/mask-adapter/mask_adapter_maft_convnext_large_cocostuff_eval_ade20k.yaml b/configs/ground-truth-warmup/mask-adapter/mask_adapter_maft_convnext_large_cocostuff_eval_ade20k.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3f8dd143851bb5a9d2ba9c71d0b36440380cd7cb --- /dev/null +++ b/configs/ground-truth-warmup/mask-adapter/mask_adapter_maft_convnext_large_cocostuff_eval_ade20k.yaml @@ -0,0 +1,40 @@ +_BASE_: ../maskformer2_R50_bs16_50ep.yaml +MODEL: + META_ARCHITECTURE: "MASK_Adapter" + MASK_ADAPTER: + NAME: "MASKAdapterHead" + MASK_IN_CHANNELS: 16 + NUM_CHANNELS: 768 + USE_CHECKPOINT: False + NUM_OUTPUT_MAPS: 16 + TRAIN_MAFT: True + # backbone part. + BACKBONE: + NAME: "CLIP" + WEIGHTS: "" + PIXEL_MEAN: [122.7709383, 116.7460125, 104.09373615] + PIXEL_STD: [68.5005327, 66.6321579, 70.32316305] + FC_CLIP: + CLIP_MODEL_NAME: "convnext_large_d_320" + CLIP_PRETRAINED_WEIGHTS: "laion2b_s29b_b131k_ft_soup" + EMBED_DIM: 768 + GEOMETRIC_ENSEMBLE_ALPHA: -1.0 + GEOMETRIC_ENSEMBLE_BETA: -1.0 + MASK_FORMER: + NUM_OBJECT_QUERIES: 250 + TEST: + SEMANTIC_ON: True + INSTANCE_ON: True + PANOPTIC_ON: True + OVERLAP_THRESHOLD: 0.8 + OBJECT_MASK_THRESHOLD: 0.0 + +INPUT: + DATASET_MAPPER_NAME: "mask_former_semantic" + +DATASETS: + TRAIN: ("openvocab_coco_2017_train_stuff_sem_seg",) + TEST: ("openvocab_ade20k_panoptic_val",) +DATALOADER: + SAMPLER_TRAIN: "TrainingSampler" +OUTPUT_DIR: ./training/first-phase/maft_l_adapter diff --git a/configs/ground-truth-warmup/maskformer2_R50_bs16_50ep.yaml b/configs/ground-truth-warmup/maskformer2_R50_bs16_50ep.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ce7b5251c23441d93b709a06101b260edeac58cc --- /dev/null +++ b/configs/ground-truth-warmup/maskformer2_R50_bs16_50ep.yaml @@ -0,0 +1,45 @@ +_BASE_: Base-COCO-PanopticSegmentation.yaml +MODEL: + META_ARCHITECTURE: "MaskFormer" + SEM_SEG_HEAD: + NAME: "FCCLIPMASKHead" + IN_FEATURES: ["res2", "res3", "res4", "res5"] + IGNORE_VALUE: 255 + NUM_CLASSES: 133 + LOSS_WEIGHT: 1.0 + CONVS_DIM: 256 + MASK_DIM: 256 + NORM: "GN" + # pixel decoder + PIXEL_DECODER_NAME: "MSDeformAttnPixelDecoder" + IN_FEATURES: ["res2", "res3", "res4", "res5"] + DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES: ["res3", "res4", "res5"] + COMMON_STRIDE: 4 + TRANSFORMER_ENC_LAYERS: 6 + MASK_FORMER: + TRANSFORMER_DECODER_NAME: "MultiScaleMaskedTransformerDecoder" + TRANSFORMER_IN_FEATURE: "multi_scale_pixel_decoder" + DEEP_SUPERVISION: True + NO_OBJECT_WEIGHT: 0.1 + CLASS_WEIGHT: 2.0 + MASK_WEIGHT: 5.0 + DICE_WEIGHT: 5.0 + HIDDEN_DIM: 256 + NUM_OBJECT_QUERIES: 100 + NHEADS: 8 + DROPOUT: 0.0 + DIM_FEEDFORWARD: 2048 + ENC_LAYERS: 0 + PRE_NORM: False + ENFORCE_INPUT_PROJ: False + SIZE_DIVISIBILITY: 32 + DEC_LAYERS: 10 # 9 decoder layers, add one for the loss on learnable query + TRAIN_NUM_POINTS: 12544 + OVERSAMPLE_RATIO: 3.0 + IMPORTANCE_SAMPLE_RATIO: 0.75 + TEST: + SEMANTIC_ON: True + INSTANCE_ON: True + PANOPTIC_ON: True + OVERLAP_THRESHOLD: 0.8 + OBJECT_MASK_THRESHOLD: 0.8 diff --git a/configs/mixed-mask-training/fc-clip/Base-COCO-PanopticSegmentation.yaml b/configs/mixed-mask-training/fc-clip/Base-COCO-PanopticSegmentation.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9ce8f18dfc84ad205607ae46348175eda40996a1 --- /dev/null +++ b/configs/mixed-mask-training/fc-clip/Base-COCO-PanopticSegmentation.yaml @@ -0,0 +1,49 @@ +MODEL: + BACKBONE: + FREEZE_AT: 0 + NAME: "build_resnet_backbone" + WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" + PIXEL_MEAN: [123.675, 116.280, 103.530] + PIXEL_STD: [58.395, 57.120, 57.375] + RESNETS: + DEPTH: 50 + STEM_TYPE: "basic" # not used + STEM_OUT_CHANNELS: 64 + STRIDE_IN_1X1: False + OUT_FEATURES: ["res2", "res3", "res4", "res5"] + # NORM: "SyncBN" + RES5_MULTI_GRID: [1, 1, 1] # not used +DATASETS: + TRAIN: ("openvocab_coco_2017_train_stuff_sem_seg",) + TEST: ("openvocab_ade20k_panoptic_val",) # to evaluate instance and semantic performance as well +SOLVER: + IMS_PER_BATCH: 18 + BASE_LR: 0.0001 + STEPS: (216859, 236574) + MAX_ITER: 246431 + WARMUP_FACTOR: 1.0 + WARMUP_ITERS: 10 + WEIGHT_DECAY: 0.05 + OPTIMIZER: "ADAMW" + BACKBONE_MULTIPLIER: 0.1 + CLIP_GRADIENTS: + ENABLED: True + CLIP_TYPE: "full_model" + CLIP_VALUE: 1.0 + NORM_TYPE: 2.0 + AMP: + ENABLED: True +INPUT: + IMAGE_SIZE: 1024 + MIN_SCALE: 0.1 + MAX_SCALE: 2.0 + MIN_SIZE_TEST: 896 + MAX_SIZE_TEST: 896 + FORMAT: "RGB" + DATASET_MAPPER_NAME: "coco_panoptic_lsj" +TEST: + EVAL_PERIOD: 5000 +DATALOADER: + FILTER_EMPTY_ANNOTATIONS: True + NUM_WORKERS: 4 +VERSION: 2 diff --git a/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_a847.yaml b/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_a847.yaml new file mode 100644 index 0000000000000000000000000000000000000000..752df354115a333886ffd8ec4c1e6f8a0f06f411 --- /dev/null +++ b/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_a847.yaml @@ -0,0 +1,12 @@ +_BASE_: ./fcclip_convnext_large_eval_ade20k.yaml + +MODEL: + MASK_FORMER: + TEST: + PANOPTIC_ON: False + INSTANCE_ON: False + +DATASETS: + TEST: ("openvocab_ade20k_full_sem_seg_val",) + +OUTPUT_DIR: ./evaluation/fc-clip/a847 \ No newline at end of file diff --git a/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_ade20k.yaml b/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_ade20k.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7daac1b8d3d2ec3f922e107a416939b585661e38 --- /dev/null +++ b/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_ade20k.yaml @@ -0,0 +1,55 @@ +_BASE_: ../maskformer2_R50_bs16_50ep.yaml +MODEL: + META_ARCHITECTURE: "FCCLIP" + SEM_SEG_HEAD: + NAME: "FCCLIPHead" + # backbone part. + MASK_ADAPTER: + NAME: "MASKAdapterHead" + MASK_IN_CHANNELS: 16 + NUM_CHANNELS: 768 + USE_CHECKPOINT: False + NUM_OUTPUT_MAPS: 16 + MASK_THRESHOLD: 0.5 + BACKBONE: + NAME: "CLIP" + WEIGHTS: "" + PIXEL_MEAN: [122.7709383, 116.7460125, 104.09373615] + PIXEL_STD: [68.5005327, 66.6321579, 70.32316305] + FC_CLIP: + CLIP_MODEL_NAME: "convnext_large_d_320" + CLIP_PRETRAINED_WEIGHTS: "laion2b_s29b_b131k_ft_soup" + EMBED_DIM: 768 + GEOMETRIC_ENSEMBLE_ALPHA: 0.7 + GEOMETRIC_ENSEMBLE_BETA: 0.9 + MASK_FORMER: + NUM_OBJECT_QUERIES: 250 + TEST: + SEMANTIC_ON: True + INSTANCE_ON: True + PANOPTIC_ON: True + OBJECT_MASK_THRESHOLD: 0.0 + +INPUT: + IMAGE_SIZE: 1024 + MIN_SCALE: 0.1 + MAX_SCALE: 2.0 + COLOR_AUG_SSD: False +SOLVER: + IMS_PER_BATCH: 24 + BASE_LR: 0.0001 + WARMUP_FACTOR: 1.0 + WARMUP_ITERS: 0 + WEIGHT_DECAY: 0.05 + STEPS: (86743, 94629) + MAX_ITER: 98572 + CHECKPOINT_PERIOD: 3300 +TEST: + EVAL_PERIOD: 3300 + +#SEED: 9782623 +DATASETS: + TRAIN: ("openvocab_coco_2017_train_panoptic_with_sem_seg",) + TEST: ("openvocab_ade20k_panoptic_val",) + +OUTPUT_DIR: ./evaluation/fc-clip/ade20k \ No newline at end of file diff --git a/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_coco.yaml b/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_coco.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b559cf5bd9fc92c41e8975ac029e91cfba0ef281 --- /dev/null +++ b/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_coco.yaml @@ -0,0 +1,4 @@ +_BASE_: ./fcclip_convnext_large_eval_ade20k.yaml +DATASETS: + TEST: ("openvocab_coco_2017_val_panoptic_with_sem_seg",) +OUTPUT_DIR: ./coco-test \ No newline at end of file diff --git a/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_pas20.yaml b/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_pas20.yaml new file mode 100644 index 0000000000000000000000000000000000000000..da3f65e00cb76c79e891b137db693ded90b70b4b --- /dev/null +++ b/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_pas20.yaml @@ -0,0 +1,12 @@ +_BASE_: ./fcclip_convnext_large_eval_ade20k.yaml + +MODEL: + MASK_FORMER: + TEST: + PANOPTIC_ON: False + INSTANCE_ON: False + +DATASETS: + TEST: ("openvocab_pascal20_sem_seg_val",) + +OUTPUT_DIR: ./evaluation/fc-clip/pas20 \ No newline at end of file diff --git a/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_pc459.yaml b/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_pc459.yaml new file mode 100644 index 0000000000000000000000000000000000000000..918e90084974159bf1f879291279e21168ca5abc --- /dev/null +++ b/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_pc459.yaml @@ -0,0 +1,12 @@ +_BASE_: ./fcclip_convnext_large_eval_ade20k.yaml + +MODEL: + MASK_FORMER: + TEST: + PANOPTIC_ON: False + INSTANCE_ON: False + +DATASETS: + TEST: ("openvocab_pascal_ctx459_sem_seg_val",) + +OUTPUT_DIR: ./evaluation/fc-clip/pc459 \ No newline at end of file diff --git a/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_pc59.yaml b/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_pc59.yaml new file mode 100644 index 0000000000000000000000000000000000000000..44920a151e40335a60ee073715f5d42767ab65a9 --- /dev/null +++ b/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_pc59.yaml @@ -0,0 +1,12 @@ +_BASE_: ./fcclip_convnext_large_eval_ade20k.yaml + +MODEL: + MASK_FORMER: + TEST: + PANOPTIC_ON: False + INSTANCE_ON: False + +DATASETS: + TEST: ("openvocab_pascal_ctx59_sem_seg_val",) + +OUTPUT_DIR: ./evaluation/fc-clip/pc59 \ No newline at end of file diff --git a/configs/mixed-mask-training/fc-clip/maskformer2_R50_bs16_50ep.yaml b/configs/mixed-mask-training/fc-clip/maskformer2_R50_bs16_50ep.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9ebf4f1114fc9ac2dd7a706acf0643559563754c --- /dev/null +++ b/configs/mixed-mask-training/fc-clip/maskformer2_R50_bs16_50ep.yaml @@ -0,0 +1,45 @@ +_BASE_: Base-COCO-PanopticSegmentation.yaml +MODEL: + META_ARCHITECTURE: "MaskFormer" + SEM_SEG_HEAD: + NAME: "MaskFormerHead" + IN_FEATURES: ["res2", "res3", "res4", "res5"] + IGNORE_VALUE: 255 + NUM_CLASSES: 133 + LOSS_WEIGHT: 1.0 + CONVS_DIM: 256 + MASK_DIM: 256 + NORM: "GN" + # pixel decoder + PIXEL_DECODER_NAME: "MSDeformAttnPixelDecoder" + IN_FEATURES: ["res2", "res3", "res4", "res5"] + DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES: ["res3", "res4", "res5"] + COMMON_STRIDE: 4 + TRANSFORMER_ENC_LAYERS: 6 + MASK_FORMER: + TRANSFORMER_DECODER_NAME: "MultiScaleMaskedTransformerDecoder" + TRANSFORMER_IN_FEATURE: "multi_scale_pixel_decoder" + DEEP_SUPERVISION: True + NO_OBJECT_WEIGHT: 0.1 + CLASS_WEIGHT: 2.0 + MASK_WEIGHT: 5.0 + DICE_WEIGHT: 5.0 + HIDDEN_DIM: 256 + NUM_OBJECT_QUERIES: 100 + NHEADS: 8 + DROPOUT: 0.0 + DIM_FEEDFORWARD: 2048 + ENC_LAYERS: 0 + PRE_NORM: False + ENFORCE_INPUT_PROJ: False + SIZE_DIVISIBILITY: 32 + DEC_LAYERS: 10 # 9 decoder layers, add one for the loss on learnable query + TRAIN_NUM_POINTS: 12544 + OVERSAMPLE_RATIO: 3.0 + IMPORTANCE_SAMPLE_RATIO: 0.75 + TEST: + SEMANTIC_ON: True + INSTANCE_ON: True + PANOPTIC_ON: True + OVERLAP_THRESHOLD: 0.8 + OBJECT_MASK_THRESHOLD: 0.8 diff --git a/configs/mixed-mask-training/maftp/Base-COCO-PanopticSegmentation.yaml b/configs/mixed-mask-training/maftp/Base-COCO-PanopticSegmentation.yaml new file mode 100644 index 0000000000000000000000000000000000000000..45e65acc5262605a638f1259c4f325df8a4e2bda --- /dev/null +++ b/configs/mixed-mask-training/maftp/Base-COCO-PanopticSegmentation.yaml @@ -0,0 +1,62 @@ +MODEL: + BACKBONE: + FREEZE_AT: 0 + NAME: "CLIP" + # WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" + PIXEL_MEAN: [122.7709383, 116.7460125, 104.09373615] + PIXEL_STD: [68.5005327, 66.6321579, 70.32316305] + RESNETS: + DEPTH: 50 + STEM_TYPE: "basic" # not used + STEM_OUT_CHANNELS: 64 + STRIDE_IN_1X1: False + OUT_FEATURES: ["res2", "res3", "res4", "res5"] + # NORM: "SyncBN" + RES5_MULTI_GRID: [1, 1, 1] # not used +DATASETS: + TRAIN: ("coco_2017_train_panoptic",) + TEST: ("coco_2017_val_panoptic_with_sem_seg",) # to evaluate instance and semantic performance as well +SOLVER: + IMS_PER_BATCH: 8 + BASE_LR: 0.0001 + BIAS_LR_FACTOR: 1.0 + CHECKPOINT_PERIOD: 50000000 + MAX_ITER: 55000 + LR_SCHEDULER_NAME: WarmupPolyLR + MOMENTUM: 0.9 + NESTEROV: false + OPTIMIZER: ADAMW + POLY_LR_CONSTANT_ENDING: 0.0 + POLY_LR_POWER: 0.9 + REFERENCE_WORLD_SIZE: 0 + WARMUP_FACTOR: 1.0 + WARMUP_ITERS: 10 + WARMUP_METHOD: linear + WEIGHT_DECAY: 2.0e-05 + #WEIGHT_DECAY: 0.05 + WEIGHT_DECAY_BIAS: null + WEIGHT_DECAY_EMBED: 0.0 + WEIGHT_DECAY_NORM: 0.0 + STEPS: (327778, 355092) + BACKBONE_MULTIPLIER: 0.1 + CLIP_GRADIENTS: + ENABLED: True + CLIP_TYPE: "full_model" + CLIP_VALUE: 1.0 + NORM_TYPE: 2.0 + AMP: + ENABLED: True +INPUT: + IMAGE_SIZE: 1024 + MIN_SCALE: 0.1 + MAX_SCALE: 2.0 + MIN_SIZE_TEST: 896 + MAX_SIZE_TEST: 896 + FORMAT: "RGB" + DATASET_MAPPER_NAME: "coco_panoptic_lsj" +TEST: + EVAL_PERIOD: 5000 +DATALOADER: + FILTER_EMPTY_ANNOTATIONS: True + NUM_WORKERS: 8 +VERSION: 2 diff --git a/configs/mixed-mask-training/maftp/maskformer2_R50_bs16_50ep.yaml b/configs/mixed-mask-training/maftp/maskformer2_R50_bs16_50ep.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f3d8c8c992aed047c90f31b8a8d039c338a1897e --- /dev/null +++ b/configs/mixed-mask-training/maftp/maskformer2_R50_bs16_50ep.yaml @@ -0,0 +1,45 @@ +_BASE_: Base-COCO-PanopticSegmentation.yaml +MODEL: + META_ARCHITECTURE: "MaskFormer" + SEM_SEG_HEAD: + NAME: "MaskFormerHead" + IN_FEATURES: ["res2", "res3", "res4", "res5"] + IGNORE_VALUE: 255 + NUM_CLASSES: 133 + LOSS_WEIGHT: 1.0 + CONVS_DIM: 256 + MASK_DIM: 256 + NORM: "GN" + # pixel decoder + PIXEL_DECODER_NAME: "MSDeformAttnPixelDecoder" + IN_FEATURES: ["res2", "res3", "res4", "res5"] + DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES: ["res3", "res4", "res5"] + COMMON_STRIDE: 4 + TRANSFORMER_ENC_LAYERS: 6 + MASK_FORMER: + TRANSFORMER_DECODER_NAME: "MultiScaleMaskedTransformerDecoder" + TRANSFORMER_IN_FEATURE: "multi_scale_pixel_decoder" + DEEP_SUPERVISION: True + NO_OBJECT_WEIGHT: 0.1 + CLASS_WEIGHT: 2.0 + MASK_WEIGHT: 5.0 + DICE_WEIGHT: 5.0 + HIDDEN_DIM: 256 + NUM_OBJECT_QUERIES: 100 + NHEADS: 8 + DROPOUT: 0.0 + DIM_FEEDFORWARD: 2048 + ENC_LAYERS: 0 + PRE_NORM: False + ENFORCE_INPUT_PROJ: False + SIZE_DIVISIBILITY: 32 + DEC_LAYERS: 10 # 9 decoder layers, add one for the loss on learnable query + TRAIN_NUM_POINTS: 12544 + OVERSAMPLE_RATIO: 3.0 + IMPORTANCE_SAMPLE_RATIO: 0.75 + TEST: + SEMANTIC_ON: True + INSTANCE_ON: False + PANOPTIC_ON: False + OBJECT_MASK_THRESHOLD: 0.2 + OVERLAP_THRESHOLD: 0.7 \ No newline at end of file diff --git a/configs/mixed-mask-training/maftp/semantic/eval_a847.yaml b/configs/mixed-mask-training/maftp/semantic/eval_a847.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cddf3b8b5c80d760df70619dd6e2a4fb716b4627 --- /dev/null +++ b/configs/mixed-mask-training/maftp/semantic/eval_a847.yaml @@ -0,0 +1,13 @@ +_BASE_: ./eval.yaml + +MODEL: + MASK_FORMER: + TEST: + PANOPTIC_ON: False + INSTANCE_ON: False + +DATASETS: + TEST: ("openvocab_ade20k_full_sem_seg_val",) + + +OUTPUT_DIR: ./eval/a847 diff --git a/configs/mixed-mask-training/maftp/semantic/eval_pas20.yaml b/configs/mixed-mask-training/maftp/semantic/eval_pas20.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0593f856c3501606590e03863449ebb6da17d414 --- /dev/null +++ b/configs/mixed-mask-training/maftp/semantic/eval_pas20.yaml @@ -0,0 +1,12 @@ +_BASE_: ./eval.yaml + +MODEL: + MASK_FORMER: + TEST: + PANOPTIC_ON: False + INSTANCE_ON: False + +DATASETS: + TEST: ("openvocab_pascal20_sem_seg_val",) + +OUTPUT_DIR: ./eval/pas20 diff --git a/configs/mixed-mask-training/maftp/semantic/eval_pas21.yaml b/configs/mixed-mask-training/maftp/semantic/eval_pas21.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a2e2afdf8006464f1e04e7a39cae3d135fa7af92 --- /dev/null +++ b/configs/mixed-mask-training/maftp/semantic/eval_pas21.yaml @@ -0,0 +1,13 @@ +_BASE_: ./eval.yaml + +MODEL: + MASK_FORMER: + TEST: + PANOPTIC_ON: False + INSTANCE_ON: False + +DATASETS: + TEST: ("openvocab_pascal21_sem_seg_val",) + + +OUTPUT_DIR: ./eval/pas21 \ No newline at end of file diff --git a/configs/mixed-mask-training/maftp/semantic/eval_pc459.yaml b/configs/mixed-mask-training/maftp/semantic/eval_pc459.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0f304ff5529877082d4972b0fcd14ff1f837d1bd --- /dev/null +++ b/configs/mixed-mask-training/maftp/semantic/eval_pc459.yaml @@ -0,0 +1,12 @@ +_BASE_: ./eval.yaml + +MODEL: + MASK_FORMER: + TEST: + PANOPTIC_ON: False + INSTANCE_ON: False + +DATASETS: + TEST: ("openvocab_pascal_ctx459_sem_seg_val",) + +OUTPUT_DIR: ./eval/pc459 \ No newline at end of file diff --git a/configs/mixed-mask-training/maftp/semantic/eval_pc59.yaml b/configs/mixed-mask-training/maftp/semantic/eval_pc59.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3d034def2d89c483db1680b5b91e44273c430c27 --- /dev/null +++ b/configs/mixed-mask-training/maftp/semantic/eval_pc59.yaml @@ -0,0 +1,12 @@ +_BASE_: ./eval.yaml + +MODEL: + MASK_FORMER: + TEST: + PANOPTIC_ON: False + INSTANCE_ON: False + +DATASETS: + TEST: ("openvocab_pascal_ctx59_sem_seg_val",) + +OUTPUT_DIR: ./eval/pc59 \ No newline at end of file diff --git a/configs/mixed-mask-training/maftp/semantic/train_semantic_base_eval_a150.yaml b/configs/mixed-mask-training/maftp/semantic/train_semantic_base_eval_a150.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9f0c7abd335b20f7ee70b147c1f64a450d7a7b7b --- /dev/null +++ b/configs/mixed-mask-training/maftp/semantic/train_semantic_base_eval_a150.yaml @@ -0,0 +1,50 @@ +# python train_net.py --config-file configs/semantic/train_semantic_base.yaml --num-gpus 8 + +_BASE_: ../maskformer2_R50_bs16_50ep.yaml +MODEL: + META_ARCHITECTURE: "MAFT_Plus" # FCCLIP MAFT_Plus + SEM_SEG_HEAD: + NAME: "FCCLIPHead" + NUM_CLASSES: 171 + MASK_ADAPTER: + NAME: "MASKAdapterHead" + MASK_IN_CHANNELS: 16 + NUM_CHANNELS: 768 + USE_CHECKPOINT: False + NUM_OUTPUT_MAPS: 16 + MASK_THRESHOLD: 0.5 + FC_CLIP: + CLIP_MODEL_NAME: "convnext_base_w_320" + CLIP_PRETRAINED_WEIGHTS: "laion_aesthetic_s13b_b82k_augreg" + EMBED_DIM: 640 + GEOMETRIC_ENSEMBLE_ALPHA: 0.7 + GEOMETRIC_ENSEMBLE_BETA: 1.0 + rc_weights: 0.1 + MASK_FORMER: + TEST: + SEMANTIC_ON: True + INSTANCE_ON: False + PANOPTIC_ON: False + OBJECT_MASK_THRESHOLD: 0.0 + cdt_params: + - 640 + - 8 + +INPUT: + DATASET_MAPPER_NAME: "mask_former_semantic" # mask_former_semantic coco_panoptic_lsj +DATASETS: + TRAIN: ("openvocab_coco_2017_train_stuff_sem_seg",) + TEST: ('openvocab_ade20k_panoptic_val',) + +SOLVER: + IMS_PER_BATCH: 24 + BASE_LR: 0.0001 + STEPS: (43371, 47314) + MAX_ITER: 49286 + CHECKPOINT_PERIOD: 2500 +TEST: + EVAL_PERIOD: 2500 +INPUT: + DATASET_MAPPER_NAME: "mask_former_semantic" # +OUTPUT_DIR: ../evaluation/maftp-base/ade20k + \ No newline at end of file diff --git a/configs/mixed-mask-training/maftp/semantic/train_semantic_large_eval_a150.yaml b/configs/mixed-mask-training/maftp/semantic/train_semantic_large_eval_a150.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f56b3d79ffcbae811da7d881d15974a311b8306 --- /dev/null +++ b/configs/mixed-mask-training/maftp/semantic/train_semantic_large_eval_a150.yaml @@ -0,0 +1,46 @@ +# python train_net.py --config-file configs/semantic/train_semantic_large.yaml --num-gpus 8 + +_BASE_: ../maskformer2_R50_bs16_50ep.yaml +MODEL: + META_ARCHITECTURE: "MAFT_Plus" # FCCLIP MAFT_Plus + SEM_SEG_HEAD: + NAME: "FCCLIPHead" + NUM_CLASSES: 171 + MASK_ADAPTER: + NAME: "MASKAdapterHead" + MASK_IN_CHANNELS: 16 + NUM_CHANNELS: 768 + USE_CHECKPOINT: False + NUM_OUTPUT_MAPS: 16 + MASK_THRESHOLD: 0.5 + FC_CLIP: + CLIP_MODEL_NAME: "convnext_large_d_320" + CLIP_PRETRAINED_WEIGHTS: "laion2b_s29b_b131k_ft_soup" + EMBED_DIM: 768 + GEOMETRIC_ENSEMBLE_ALPHA: 0.8 + GEOMETRIC_ENSEMBLE_BETA: 1.0 + rc_weights: 0.1 + MASK_FORMER: + TEST: + SEMANTIC_ON: True + INSTANCE_ON: True + PANOPTIC_ON: True + OBJECT_MASK_THRESHOLD: 0.0 + +SOLVER: + IMS_PER_BATCH: 24 + BASE_LR: 0.0001 + STEPS: (43371, 47314) + MAX_ITER: 49286 + CHECKPOINT_PERIOD: 2500 +TEST: + EVAL_PERIOD: 2500 +INPUT: + DATASET_MAPPER_NAME: "mask_former_semantic" # mask_former_semantic coco_panoptic_lsj +DATASETS: + TRAIN: ("openvocab_coco_2017_train_stuff_sem_seg",) # openvocab_coco_2017_train_panoptic_with_sem_seg + TEST: ('openvocab_ade20k_panoptic_val',) + + + +OUTPUT_DIR: ../evaluation/maftp-large/ade20k diff --git a/demo/demo.py b/demo/demo.py new file mode 100644 index 0000000000000000000000000000000000000000..129d7f100c5ab22d259206e55c97b18dbe3ca49b --- /dev/null +++ b/demo/demo.py @@ -0,0 +1,201 @@ +""" +This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”). +All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. + +Reference: https://github.com/facebookresearch/Mask2Former/blob/main/demo/demo.py +""" + +import argparse +import glob +import multiprocessing as mp +import os + +# fmt: off +import sys +sys.path.insert(1, os.path.join(sys.path[0], '..')) +# fmt: on + +import tempfile +import time +import warnings + +import cv2 +import numpy as np +import tqdm + +from detectron2.config import get_cfg +from detectron2.data.detection_utils import read_image +from detectron2.projects.deeplab import add_deeplab_config +from detectron2.utils.logger import setup_logger + +from fcclip import add_maskformer2_config, add_fcclip_config, add_mask_adapter_config +from predictor import VisualizationDemo + + +# constants +WINDOW_NAME = "mask-adapter demo" + + +def setup_cfg(args): + # load config from file and command-line arguments + cfg = get_cfg() + add_deeplab_config(cfg) + add_maskformer2_config(cfg) + add_fcclip_config(cfg) + add_mask_adapter_config(cfg) + cfg.merge_from_file(args.config_file) + cfg.merge_from_list(args.opts) + cfg.freeze() + return cfg + + +def get_parser(): + parser = argparse.ArgumentParser(description="mask-adapter demo for builtin configs") + parser.add_argument( + "--config-file", + default="configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_ade20k.yaml", + metavar="FILE", + help="path to config file", + ) + parser.add_argument("--webcam", action="store_true", help="Take inputs from webcam.") + parser.add_argument("--video-input", help="Path to video file.") + parser.add_argument( + "--input", + nargs="+", + help="A list of space separated input images; " + "or a single glob pattern such as 'directory/*.jpg'", + ) + parser.add_argument( + "--output", + help="A file or directory to save output visualizations. " + "If not given, will show output in an OpenCV window.", + ) + + parser.add_argument( + "--confidence-threshold", + type=float, + default=0.5, + help="Minimum score for instance predictions to be shown", + ) + parser.add_argument( + "--opts", + help="Modify config options using the command-line 'KEY VALUE' pairs", + default=[], + nargs=argparse.REMAINDER, + ) + return parser + + +def test_opencv_video_format(codec, file_ext): + with tempfile.TemporaryDirectory(prefix="video_format_test") as dir: + filename = os.path.join(dir, "test_file" + file_ext) + writer = cv2.VideoWriter( + filename=filename, + fourcc=cv2.VideoWriter_fourcc(*codec), + fps=float(30), + frameSize=(10, 10), + isColor=True, + ) + [writer.write(np.zeros((10, 10, 3), np.uint8)) for _ in range(30)] + writer.release() + if os.path.isfile(filename): + return True + return False + + +if __name__ == "__main__": + mp.set_start_method("spawn", force=True) + args = get_parser().parse_args() + setup_logger(name="fvcore") + logger = setup_logger() + logger.info("Arguments: " + str(args)) + + cfg = setup_cfg(args) + + demo = VisualizationDemo(cfg) + + if args.input: + if len(args.input) == 1: + args.input = glob.glob(os.path.expanduser(args.input[0])) + assert args.input, "The input path(s) was not found" + for path in tqdm.tqdm(args.input, disable=not args.output): + # use PIL, to be consistent with evaluation + img = read_image(path, format="BGR") + start_time = time.time() + predictions, visualized_output = demo.run_on_image(img) + logger.info( + "{}: {} in {:.2f}s".format( + path, + "detected {} instances".format(len(predictions["instances"])) + if "instances" in predictions + else "finished", + time.time() - start_time, + ) + ) + + if args.output: + if os.path.isdir(args.output): + assert os.path.isdir(args.output), args.output + out_filename = os.path.join(args.output, os.path.basename(path)) + else: + assert len(args.input) == 1, "Please specify a directory with args.output" + out_filename = args.output + visualized_output.save(out_filename) + else: + cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) + cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1]) + if cv2.waitKey(0) == 27: + break # esc to quit + elif args.webcam: + assert args.input is None, "Cannot have both --input and --webcam!" + assert args.output is None, "output not yet supported with --webcam!" + cam = cv2.VideoCapture(0) + for vis in tqdm.tqdm(demo.run_on_video(cam)): + cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) + cv2.imshow(WINDOW_NAME, vis) + if cv2.waitKey(1) == 27: + break # esc to quit + cam.release() + cv2.destroyAllWindows() + elif args.video_input: + video = cv2.VideoCapture(args.video_input) + width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) + frames_per_second = video.get(cv2.CAP_PROP_FPS) + num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) + basename = os.path.basename(args.video_input) + codec, file_ext = ( + ("x264", ".mkv") if test_opencv_video_format("x264", ".mkv") else ("mp4v", ".mp4") + ) + if codec == ".mp4v": + warnings.warn("x264 codec not available, switching to mp4v") + if args.output: + if os.path.isdir(args.output): + output_fname = os.path.join(args.output, basename) + output_fname = os.path.splitext(output_fname)[0] + file_ext + else: + output_fname = args.output + assert not os.path.isfile(output_fname), output_fname + output_file = cv2.VideoWriter( + filename=output_fname, + # some installation of opencv may not support x264 (due to its license), + # you can try other format (e.g. MPEG) + fourcc=cv2.VideoWriter_fourcc(*codec), + fps=float(frames_per_second), + frameSize=(width, height), + isColor=True, + ) + assert os.path.isfile(args.video_input) + for vis_frame in tqdm.tqdm(demo.run_on_video(video), total=num_frames): + if args.output: + output_file.write(vis_frame) + else: + cv2.namedWindow(basename, cv2.WINDOW_NORMAL) + cv2.imshow(basename, vis_frame) + if cv2.waitKey(1) == 27: + break # esc to quit + video.release() + if args.output: + output_file.release() + else: + cv2.destroyAllWindows() diff --git a/demo/images/000000000605.jpg b/demo/images/000000000605.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d3e4314159d7b936fcc2fd23e2ade02a70d6e97b Binary files /dev/null and b/demo/images/000000000605.jpg differ diff --git a/demo/images/000000001025.jpg b/demo/images/000000001025.jpg new file mode 100644 index 0000000000000000000000000000000000000000..0604f04d6460750dc93e0ef18b09fbacc0ae5a0d Binary files /dev/null and b/demo/images/000000001025.jpg differ diff --git a/demo/images/000000290833.jpg b/demo/images/000000290833.jpg new file mode 100644 index 0000000000000000000000000000000000000000..15e77931d90aec95ac9f3559da0142818bce3d1d Binary files /dev/null and b/demo/images/000000290833.jpg differ diff --git a/demo/images/ADE_val_00000739.jpg b/demo/images/ADE_val_00000739.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b759cdd0134155b66dfb5b6f6efa7faee53006c8 Binary files /dev/null and b/demo/images/ADE_val_00000739.jpg differ diff --git a/demo/images/ADE_val_00000979.jpg b/demo/images/ADE_val_00000979.jpg new file mode 100644 index 0000000000000000000000000000000000000000..fa9e29e5f3228f5d7a053399d50004b2a4049177 Binary files /dev/null and b/demo/images/ADE_val_00000979.jpg differ diff --git a/demo/images/ADE_val_00001200.jpg b/demo/images/ADE_val_00001200.jpg new file mode 100644 index 0000000000000000000000000000000000000000..910d9277a95eec22b73cf66cebd0d7099e5f0210 Binary files /dev/null and b/demo/images/ADE_val_00001200.jpg differ diff --git a/demo/predictor.py b/demo/predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..48016d1ae44dffc1c7ed390c20b104031e1d2c7c --- /dev/null +++ b/demo/predictor.py @@ -0,0 +1,280 @@ +""" +This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”). +All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. + +Reference: https://github.com/facebookresearch/Mask2Former/blob/main/demo/predictor.py +""" + +import atexit +import bisect +import multiprocessing as mp +from collections import deque + +import cv2 +import torch +import itertools + + +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.engine.defaults import DefaultPredictor as d2_defaultPredictor +from detectron2.utils.video_visualizer import VideoVisualizer +from detectron2.utils.visualizer import ColorMode, Visualizer, random_color +import detectron2.utils.visualizer as d2_visualizer + + +class DefaultPredictor(d2_defaultPredictor): + + def set_metadata(self, metadata): + self.model.set_metadata(metadata) + + +class OpenVocabVisualizer(Visualizer): + def draw_panoptic_seg(self, panoptic_seg, segments_info, area_threshold=None, alpha=0.7): + """ + Draw panoptic prediction annotations or results. + + Args: + panoptic_seg (Tensor): of shape (height, width) where the values are ids for each + segment. + segments_info (list[dict] or None): Describe each segment in `panoptic_seg`. + If it is a ``list[dict]``, each dict contains keys "id", "category_id". + If None, category id of each pixel is computed by + ``pixel // metadata.label_divisor``. + area_threshold (int): stuff segments with less than `area_threshold` are not drawn. + + Returns: + output (VisImage): image object with visualizations. + """ + pred = d2_visualizer._PanopticPrediction(panoptic_seg, segments_info, self.metadata) + + if self._instance_mode == ColorMode.IMAGE_BW: + self.output.reset_image(self._create_grayscale_image(pred.non_empty_mask())) + # draw mask for all semantic segments first i.e. "stuff" + for mask, sinfo in pred.semantic_masks(): + category_idx = sinfo["category_id"] + try: + mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]] + except AttributeError: + mask_color = None + + text = self.metadata.stuff_classes[category_idx].split(',')[0] + self.draw_binary_mask( + mask, + color=mask_color, + edge_color=d2_visualizer._OFF_WHITE, + text=text, + alpha=alpha, + area_threshold=area_threshold, + ) + # draw mask for all instances second + all_instances = list(pred.instance_masks()) + if len(all_instances) == 0: + return self.output + masks, sinfo = list(zip(*all_instances)) + category_ids = [x["category_id"] for x in sinfo] + + try: + scores = [x["score"] for x in sinfo] + except KeyError: + scores = None + stuff_classes = self.metadata.stuff_classes + stuff_classes = [x.split(',')[0] for x in stuff_classes] + labels = d2_visualizer._create_text_labels( + category_ids, scores, stuff_classes, [x.get("iscrowd", 0) for x in sinfo] + ) + + try: + colors = [ + self._jitter([x / 255 for x in self.metadata.stuff_colors[c]]) for c in category_ids + ] + except AttributeError: + colors = None + self.overlay_instances(masks=masks, labels=labels, assigned_colors=colors, alpha=alpha) + + return self.output + + +class VisualizationDemo(object): + def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False): + """ + Args: + cfg (CfgNode): + instance_mode (ColorMode): + parallel (bool): whether to run the model in different processes from visualization. + Useful since the visualization logic can be slow. + """ + + coco_metadata = MetadataCatalog.get("openvocab_coco_2017_val_panoptic_with_sem_seg") + ade20k_metadata = MetadataCatalog.get("openvocab_ade20k_panoptic_val") + lvis_classes = open("./fcclip/data/datasets/lvis_1203_with_prompt_eng.txt", 'r').read().splitlines() + lvis_classes = [x[x.find(':')+1:] for x in lvis_classes] + lvis_colors = list( + itertools.islice(itertools.cycle(coco_metadata.stuff_colors), len(lvis_classes)) + ) + # rerrange to thing_classes, stuff_classes + coco_thing_classes = coco_metadata.thing_classes + coco_stuff_classes = [x for x in coco_metadata.stuff_classes if x not in coco_thing_classes] + coco_thing_colors = coco_metadata.thing_colors + coco_stuff_colors = [x for x in coco_metadata.stuff_colors if x not in coco_thing_colors] + ade20k_thing_classes = ade20k_metadata.thing_classes + ade20k_stuff_classes = [x for x in ade20k_metadata.stuff_classes if x not in ade20k_thing_classes] + ade20k_thing_colors = ade20k_metadata.thing_colors + ade20k_stuff_colors = [x for x in ade20k_metadata.stuff_colors if x not in ade20k_thing_colors] + + user_classes = [] + user_colors = [random_color(rgb=True, maximum=1) for _ in range(len(user_classes))] + + stuff_classes = coco_stuff_classes + ade20k_stuff_classes + stuff_colors = coco_stuff_colors + ade20k_stuff_colors + thing_classes = user_classes + coco_thing_classes + ade20k_thing_classes + lvis_classes + thing_colors = user_colors + coco_thing_colors + ade20k_thing_colors + lvis_colors + + thing_dataset_id_to_contiguous_id = {x: x for x in range(len(thing_classes))} + DatasetCatalog.register( + "openvocab_dataset", lambda x: [] + ) + self.metadata = MetadataCatalog.get("openvocab_dataset").set( + stuff_classes=thing_classes+stuff_classes, + stuff_colors=thing_colors+stuff_colors, + thing_dataset_id_to_contiguous_id=thing_dataset_id_to_contiguous_id, + ) + #print("self.metadata:", self.metadata) + self.cpu_device = torch.device("cpu") + self.instance_mode = instance_mode + + self.parallel = parallel + if parallel: + num_gpu = torch.cuda.device_count() + self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu) + else: + self.predictor = DefaultPredictor(cfg) + self.predictor.set_metadata(self.metadata) + + def run_on_image(self, image): + """ + Args: + image (np.ndarray): an image of shape (H, W, C) (in BGR order). + This is the format used by OpenCV. + Returns: + predictions (dict): the output of the model. + vis_output (VisImage): the visualized image output. + """ + vis_output = None + predictions = self.predictor(image) + # Convert image from OpenCV BGR format to Matplotlib RGB format. + image = image[:, :, ::-1] + visualizer = OpenVocabVisualizer(image, self.metadata, instance_mode=self.instance_mode) + if "panoptic_seg" in predictions: + panoptic_seg, segments_info = predictions["panoptic_seg"] + vis_output = visualizer.draw_panoptic_seg( + panoptic_seg.to(self.cpu_device), segments_info + ) + else: + if "sem_seg" in predictions: + vis_output = visualizer.draw_sem_seg( + predictions["sem_seg"].argmax(dim=0).to(self.cpu_device) + ) + if "instances" in predictions: + instances = predictions["instances"].to(self.cpu_device) + vis_output = visualizer.draw_instance_predictions(predictions=instances) + + return predictions, vis_output + + def _frame_from_video(self, video): + while video.isOpened(): + success, frame = video.read() + if success: + yield frame + else: + break + + +class AsyncPredictor: + """ + A predictor that runs the model asynchronously, possibly on >1 GPUs. + Because rendering the visualization takes considerably amount of time, + this helps improve throughput a little bit when rendering videos. + """ + + class _StopToken: + pass + + class _PredictWorker(mp.Process): + def __init__(self, cfg, task_queue, result_queue): + self.cfg = cfg + self.task_queue = task_queue + self.result_queue = result_queue + super().__init__() + + def run(self): + predictor = DefaultPredictor(self.cfg) + + while True: + task = self.task_queue.get() + if isinstance(task, AsyncPredictor._StopToken): + break + idx, data = task + result = predictor(data) + self.result_queue.put((idx, result)) + + def __init__(self, cfg, num_gpus: int = 1): + """ + Args: + cfg (CfgNode): + num_gpus (int): if 0, will run on CPU + """ + num_workers = max(num_gpus, 1) + self.task_queue = mp.Queue(maxsize=num_workers * 3) + self.result_queue = mp.Queue(maxsize=num_workers * 3) + self.procs = [] + for gpuid in range(max(num_gpus, 1)): + cfg = cfg.clone() + cfg.defrost() + cfg.MODEL.DEVICE = "cuda:{}".format(gpuid) if num_gpus > 0 else "cpu" + self.procs.append( + AsyncPredictor._PredictWorker(cfg, self.task_queue, self.result_queue) + ) + + self.put_idx = 0 + self.get_idx = 0 + self.result_rank = [] + self.result_data = [] + + for p in self.procs: + p.start() + atexit.register(self.shutdown) + + def put(self, image): + self.put_idx += 1 + self.task_queue.put((self.put_idx, image)) + + def get(self): + self.get_idx += 1 # the index needed for this request + if len(self.result_rank) and self.result_rank[0] == self.get_idx: + res = self.result_data[0] + del self.result_data[0], self.result_rank[0] + return res + + while True: + # make sure the results are returned in the correct order + idx, res = self.result_queue.get() + if idx == self.get_idx: + return res + insert = bisect.bisect(self.result_rank, idx) + self.result_rank.insert(insert, idx) + self.result_data.insert(insert, res) + + def __len__(self): + return self.put_idx - self.get_idx + + def __call__(self, image): + self.put(image) + return self.get() + + def shutdown(self): + for _ in self.procs: + self.task_queue.put(AsyncPredictor._StopToken()) + + @property + def default_buffer_size(self): + return len(self.procs) * 5 \ No newline at end of file diff --git a/mask_adapter/.DS_Store b/mask_adapter/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..23d90bd83fecfc358450f7d9b9457ff2ebcc3d3b Binary files /dev/null and b/mask_adapter/.DS_Store differ diff --git a/mask_adapter/__init__.py b/mask_adapter/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..27ba9568cd683c7317227aadb9067f431fcb32ed --- /dev/null +++ b/mask_adapter/__init__.py @@ -0,0 +1,44 @@ +""" +Copyright (2023) Bytedance Ltd. and/or its affiliates + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from . import data # register all new datasets +from . import modeling + +# config +from .config import add_maskformer2_config, add_fcclip_config, add_mask_adapter_config + +# dataset loading +from .data.dataset_mappers.coco_instance_new_baseline_dataset_mapper import COCOInstanceNewBaselineDatasetMapper +from .data.dataset_mappers.coco_panoptic_new_baseline_dataset_mapper import COCOPanopticNewBaselineDatasetMapper +#from .data.dataset_mappers.grand_new_baseline_dataset_mapper import GrandNewBaselineDatasetMapper +from .data.dataset_mappers.mask_former_instance_dataset_mapper import ( + MaskFormerInstanceDatasetMapper, +) +from .data.dataset_mappers.mask_former_panoptic_dataset_mapper import ( + MaskFormerPanopticDatasetMapper, +) +from .data.dataset_mappers.mask_former_semantic_dataset_mapper import ( + MaskFormerSemanticDatasetMapper, +) +from .data.dataset_mappers.coco_combine_new_baseline_dataset_mapper import ( + COCOCombineNewBaselineDatasetMapper, +) +from .data.custom_dataset_dataloader import * +# models +from .mask_adapter import MASK_Adapter +from .test_time_augmentation import SemanticSegmentorWithTTA + +# evaluation +from .evaluation.instance_evaluation import InstanceSegEvaluator diff --git a/mask_adapter/__pycache__/__init__.cpython-310.pyc b/mask_adapter/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a68383bfbcd6cd6ef148a72c4ffaadbb0e7095ba Binary files /dev/null and b/mask_adapter/__pycache__/__init__.cpython-310.pyc differ diff --git a/mask_adapter/__pycache__/__init__.cpython-38.pyc b/mask_adapter/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6467faa4903a6be0f0b0dd97998faa2d129c8afa Binary files /dev/null and b/mask_adapter/__pycache__/__init__.cpython-38.pyc differ diff --git a/mask_adapter/__pycache__/config.cpython-310.pyc b/mask_adapter/__pycache__/config.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..54510402cf668701ba6d50fe83c154e69c884ee7 Binary files /dev/null and b/mask_adapter/__pycache__/config.cpython-310.pyc differ diff --git a/mask_adapter/__pycache__/config.cpython-38.pyc b/mask_adapter/__pycache__/config.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dfa297e4d91a477058abdd30fe0b1dc98ef37c98 Binary files /dev/null and b/mask_adapter/__pycache__/config.cpython-38.pyc differ diff --git a/mask_adapter/__pycache__/fcclip.cpython-310.pyc b/mask_adapter/__pycache__/fcclip.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..def71d6f94aead039499a80cf72d9f7cb89b0f2d Binary files /dev/null and b/mask_adapter/__pycache__/fcclip.cpython-310.pyc differ diff --git a/mask_adapter/__pycache__/fcclip.cpython-38.pyc b/mask_adapter/__pycache__/fcclip.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4051d08d54cbec8ec3c127ab22550498940548df Binary files /dev/null and b/mask_adapter/__pycache__/fcclip.cpython-38.pyc differ diff --git a/mask_adapter/__pycache__/mask_adapter.cpython-310.pyc b/mask_adapter/__pycache__/mask_adapter.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..258861281f394c58c5c083e84e07ca54376e71d4 Binary files /dev/null and b/mask_adapter/__pycache__/mask_adapter.cpython-310.pyc differ diff --git a/mask_adapter/__pycache__/mask_adapter.cpython-38.pyc b/mask_adapter/__pycache__/mask_adapter.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..86def20808948a92baea923edd2e6d7a48a704e4 Binary files /dev/null and b/mask_adapter/__pycache__/mask_adapter.cpython-38.pyc differ diff --git a/mask_adapter/__pycache__/sam_maskadapter.cpython-310.pyc b/mask_adapter/__pycache__/sam_maskadapter.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9b0f8973415b4a1b638a55df3ca52ff628f5878f Binary files /dev/null and b/mask_adapter/__pycache__/sam_maskadapter.cpython-310.pyc differ diff --git a/mask_adapter/__pycache__/test_time_augmentation.cpython-310.pyc b/mask_adapter/__pycache__/test_time_augmentation.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e6628d22176ad796cf65018d0c93982ff08f80fb Binary files /dev/null and b/mask_adapter/__pycache__/test_time_augmentation.cpython-310.pyc differ diff --git a/mask_adapter/__pycache__/test_time_augmentation.cpython-38.pyc b/mask_adapter/__pycache__/test_time_augmentation.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..de4d1c21df11d71bc2615c6e920ad8034f32c2c0 Binary files /dev/null and b/mask_adapter/__pycache__/test_time_augmentation.cpython-38.pyc differ diff --git a/mask_adapter/config.py b/mask_adapter/config.py new file mode 100644 index 0000000000000000000000000000000000000000..61e2a19116efa589bff31f5708618a7c3b4aaa49 --- /dev/null +++ b/mask_adapter/config.py @@ -0,0 +1,150 @@ +# -*- coding: utf-8 -*- +""" +This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”). +All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. + +Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/config.py +""" +from detectron2.config import CfgNode as CN + + +def add_maskformer2_config(cfg): + """ + Add config for MASK_FORMER. + """ + # NOTE: configs from original maskformer + # data config + # select the dataset mapper + cfg.INPUT.DATASET_MAPPER_NAME = "mask_former_semantic" + # Color augmentation + cfg.INPUT.COLOR_AUG_SSD = False + # We retry random cropping until no single category in semantic segmentation GT occupies more + # than `SINGLE_CATEGORY_MAX_AREA` part of the crop. + cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA = 1.0 + # Pad image and segmentation GT in dataset mapper. + cfg.INPUT.SIZE_DIVISIBILITY = -1 + + # solver config + # weight decay on embedding + cfg.SOLVER.WEIGHT_DECAY_EMBED = 0.0 + # optimizer + cfg.SOLVER.OPTIMIZER = "ADAMW" + cfg.SOLVER.BACKBONE_MULTIPLIER = 0.1 + + # mask_former model config + cfg.MODEL.MASK_FORMER = CN() + + # loss + cfg.MODEL.MASK_FORMER.DEEP_SUPERVISION = True + cfg.MODEL.MASK_FORMER.NO_OBJECT_WEIGHT = 0.1 + cfg.MODEL.MASK_FORMER.CLASS_WEIGHT = 1.0 + cfg.MODEL.MASK_FORMER.DICE_WEIGHT = 1.0 + cfg.MODEL.MASK_FORMER.MASK_WEIGHT = 20.0 + + # transformer config + cfg.MODEL.MASK_FORMER.NHEADS = 8 + cfg.MODEL.MASK_FORMER.DROPOUT = 0.1 + cfg.MODEL.MASK_FORMER.DIM_FEEDFORWARD = 2048 + cfg.MODEL.MASK_FORMER.ENC_LAYERS = 0 + cfg.MODEL.MASK_FORMER.DEC_LAYERS = 6 + cfg.MODEL.MASK_FORMER.PRE_NORM = False + + cfg.MODEL.MASK_FORMER.HIDDEN_DIM = 256 + cfg.MODEL.MASK_FORMER.NUM_OBJECT_QUERIES = 100 + + cfg.MODEL.MASK_FORMER.TRANSFORMER_IN_FEATURE = "res5" + cfg.MODEL.MASK_FORMER.ENFORCE_INPUT_PROJ = False + + # mask_former inference config + cfg.MODEL.MASK_FORMER.TEST = CN() + cfg.MODEL.MASK_FORMER.TEST.SEMANTIC_ON = True + cfg.MODEL.MASK_FORMER.TEST.INSTANCE_ON = False + cfg.MODEL.MASK_FORMER.TEST.PANOPTIC_ON = False + cfg.MODEL.MASK_FORMER.TEST.OBJECT_MASK_THRESHOLD = 0.0 + cfg.MODEL.MASK_FORMER.TEST.OVERLAP_THRESHOLD = 0.0 + cfg.MODEL.MASK_FORMER.TEST.SEM_SEG_POSTPROCESSING_BEFORE_INFERENCE = False + + # Sometimes `backbone.size_divisibility` is set to 0 for some backbone (e.g. ResNet) + # you can use this config to override + cfg.MODEL.MASK_FORMER.SIZE_DIVISIBILITY = 32 + + # pixel decoder config + cfg.MODEL.SEM_SEG_HEAD.MASK_DIM = 256 + # adding transformer in pixel decoder + cfg.MODEL.SEM_SEG_HEAD.TRANSFORMER_ENC_LAYERS = 0 + # pixel decoder + cfg.MODEL.SEM_SEG_HEAD.PIXEL_DECODER_NAME = "BasePixelDecoder" + + # swin transformer backbone + cfg.MODEL.SWIN = CN() + cfg.MODEL.SWIN.PRETRAIN_IMG_SIZE = 224 + cfg.MODEL.SWIN.PATCH_SIZE = 4 + cfg.MODEL.SWIN.EMBED_DIM = 96 + cfg.MODEL.SWIN.DEPTHS = [2, 2, 6, 2] + cfg.MODEL.SWIN.NUM_HEADS = [3, 6, 12, 24] + cfg.MODEL.SWIN.WINDOW_SIZE = 7 + cfg.MODEL.SWIN.MLP_RATIO = 4.0 + cfg.MODEL.SWIN.QKV_BIAS = True + cfg.MODEL.SWIN.QK_SCALE = None + cfg.MODEL.SWIN.DROP_RATE = 0.0 + cfg.MODEL.SWIN.ATTN_DROP_RATE = 0.0 + cfg.MODEL.SWIN.DROP_PATH_RATE = 0.3 + cfg.MODEL.SWIN.APE = False + cfg.MODEL.SWIN.PATCH_NORM = True + cfg.MODEL.SWIN.OUT_FEATURES = ["res2", "res3", "res4", "res5"] + cfg.MODEL.SWIN.USE_CHECKPOINT = False + + # NOTE: maskformer2 extra configs + # transformer module + cfg.MODEL.MASK_FORMER.TRANSFORMER_DECODER_NAME = "MultiScaleMaskedTransformerDecoder" + + # LSJ aug + cfg.INPUT.IMAGE_SIZE = 1024 + cfg.INPUT.MIN_SCALE = 0.1 + cfg.INPUT.MAX_SCALE = 2.0 + + # MSDeformAttn encoder configs + cfg.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES = ["res3", "res4", "res5"] + cfg.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_N_POINTS = 4 + cfg.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_N_HEADS = 8 + + # point loss configs + # Number of points sampled during training for a mask point head. + cfg.MODEL.MASK_FORMER.TRAIN_NUM_POINTS = 112 * 112 + # Oversampling parameter for PointRend point sampling during training. Parameter `k` in the + # original paper. + cfg.MODEL.MASK_FORMER.OVERSAMPLE_RATIO = 3.0 + # Importance sampling parameter for PointRend point sampling during training. Parametr `beta` in + # the original paper. + cfg.MODEL.MASK_FORMER.IMPORTANCE_SAMPLE_RATIO = 0.75 + + +def add_fcclip_config(cfg): + # FC-CLIP model config + cfg.MODEL.FC_CLIP = CN() + cfg.MODEL.FC_CLIP.CLIP_MODEL_NAME = "convnext_large_d_320" + cfg.MODEL.FC_CLIP.CLIP_PRETRAINED_WEIGHTS = "laion2b_s29b_b131k_ft_soup" + cfg.MODEL.FC_CLIP.EMBED_DIM = 768 + cfg.MODEL.FC_CLIP.GEOMETRIC_ENSEMBLE_ALPHA = 0.4 + cfg.MODEL.FC_CLIP.GEOMETRIC_ENSEMBLE_BETA = 0.8 + cfg.MODEL.FC_CLIP.ENSEMBLE_ON_VALID_MASK = False + +def add_mask_adapter_config(cfg): + # Mask-Adapter model config + cfg.MODEL.MASK_ADAPTER = CN() + cfg.MODEL.MASK_ADAPTER.MASK_IN_CHANNELS = 16 + cfg.MODEL.MASK_ADAPTER.NUM_CHANNELS = 768 + cfg.MODEL.MASK_ADAPTER.USE_CHECKPOINT = False + cfg.MODEL.MASK_ADAPTER.NUM_OUTPUT_MAPS = 16 + + cfg.MODEL.MASK_ADAPTER.MASK_THRESHOLD = 0.45 + cfg.MODEL.MASK_ADAPTER.TRAIN_MAFT = False + + cfg.MODEL.MASK_ADAPTER.NAME = "MASKAdapterHead" + + cfg.DATALOADER.DATASET_RATIO = [1, 1] + cfg.DATALOADER.USE_DIFF_BS_SIZE = True + cfg.DATALOADER.DATASET_BS = [2, 2] + cfg.DATALOADER.USE_RFS = [False, False] + cfg.DATALOADER.MULTI_DATASET_GROUPING = True + cfg.DATALOADER.DATASET_ANN = ['box', 'box'] \ No newline at end of file diff --git a/mask_adapter/data/.DS_Store b/mask_adapter/data/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..e829afd79ec3201bfb6e7e9a3053eb81f794d5de Binary files /dev/null and b/mask_adapter/data/.DS_Store differ diff --git a/mask_adapter/data/__init__.py b/mask_adapter/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..54e2ec93d0494174518bc86d5769cea98fab0ea7 --- /dev/null +++ b/mask_adapter/data/__init__.py @@ -0,0 +1,16 @@ +""" +Copyright (2023) Bytedance Ltd. and/or its affiliates + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from . import datasets diff --git a/mask_adapter/data/__pycache__/__init__.cpython-310.pyc b/mask_adapter/data/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5815d02b3f2443bbda74be2f0f61055f35e1e397 Binary files /dev/null and b/mask_adapter/data/__pycache__/__init__.cpython-310.pyc differ diff --git a/mask_adapter/data/__pycache__/__init__.cpython-38.pyc b/mask_adapter/data/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7668cd5f7aa13751de97807d432fa5b034dcb60b Binary files /dev/null and b/mask_adapter/data/__pycache__/__init__.cpython-38.pyc differ diff --git a/mask_adapter/data/__pycache__/custom_dataset_dataloader.cpython-310.pyc b/mask_adapter/data/__pycache__/custom_dataset_dataloader.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..164b526b9d98149f384ceb44a24826042ebf9626 Binary files /dev/null and b/mask_adapter/data/__pycache__/custom_dataset_dataloader.cpython-310.pyc differ diff --git a/mask_adapter/data/__pycache__/custom_dataset_dataloader.cpython-38.pyc b/mask_adapter/data/__pycache__/custom_dataset_dataloader.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8fd0068d877e3af2db6fb677a32d80b8024a1d2a Binary files /dev/null and b/mask_adapter/data/__pycache__/custom_dataset_dataloader.cpython-38.pyc differ diff --git a/mask_adapter/data/custom_dataset_dataloader.py b/mask_adapter/data/custom_dataset_dataloader.py new file mode 100644 index 0000000000000000000000000000000000000000..38b6e7b060f4dff4c801d6b27845c0ae1529cc26 --- /dev/null +++ b/mask_adapter/data/custom_dataset_dataloader.py @@ -0,0 +1,331 @@ +# Copyright (c) 2024 ByteDance. All Rights Reserved. +# Part of the code is from https://github.com/xingyizhou/UniDet/blob/master/projects/UniDet/unidet/data/multi_dataset_dataloader.py (Apache-2.0 License) +import copy +import logging +import numpy as np +import operator +import torch +import torch.utils.data +import json +from detectron2.utils.comm import get_world_size +from detectron2.utils.logger import _log_api_usage, log_first_n + +from detectron2.config import configurable +from detectron2.data import samplers +from torch.utils.data.sampler import BatchSampler, Sampler +from detectron2.data.common import DatasetFromList, MapDataset +from detectron2.data.dataset_mapper import DatasetMapper +from detectron2.data.build import get_detection_dataset_dicts, build_batch_data_loader +from detectron2.data.samplers import TrainingSampler, RepeatFactorTrainingSampler +from detectron2.data.build import worker_init_reset_seed, print_instances_class_histogram +from detectron2.data.build import filter_images_with_only_crowd_annotations +from detectron2.data.build import filter_images_with_few_keypoints +from detectron2.data.build import check_metadata_consistency +from detectron2.data.catalog import MetadataCatalog, DatasetCatalog +from detectron2.utils import comm +import itertools +import math +from collections import defaultdict +from typing import Optional + + +def _custom_train_loader_from_config(cfg, mapper=None, *, dataset=None, sampler=None): + sampler_name = cfg.DATALOADER.SAMPLER_TRAIN # "MultiDatasetSampler" + if 'MultiDataset' in sampler_name: # True + dataset_dicts = get_detection_dataset_dicts_with_source( + cfg.DATASETS.TRAIN, + filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, + min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE + if cfg.MODEL.KEYPOINT_ON else 0, + proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, + ) + else: # False + dataset_dicts = get_detection_dataset_dicts( + cfg.DATASETS.TRAIN, + filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS, + min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE + if cfg.MODEL.KEYPOINT_ON else 0, + proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None, + ) + + if mapper is None: # False + mapper = DatasetMapper(cfg, True) + + if sampler is not None: + pass + elif sampler_name == "TrainingSampler": # False + sampler = TrainingSampler(len(dataset)) + elif sampler_name == "MultiDatasetSampler": # True + sampler = MultiDatasetSampler( + dataset_dicts, + dataset_ratio = cfg.DATALOADER.DATASET_RATIO, + use_rfs = cfg.DATALOADER.USE_RFS, + dataset_ann = cfg.DATALOADER.DATASET_ANN, + repeat_threshold = cfg.DATALOADER.REPEAT_THRESHOLD, + ) + elif sampler_name == "RepeatFactorTrainingSampler": # False + repeat_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency( + dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD + ) + sampler = RepeatFactorTrainingSampler(repeat_factors) + else: + raise ValueError("Unknown training sampler: {}".format(sampler_name)) + + return { + "dataset": dataset_dicts, + "sampler": sampler, + "mapper": mapper, + "total_batch_size": cfg.SOLVER.IMS_PER_BATCH, # 64 + "aspect_ratio_grouping": cfg.DATALOADER.ASPECT_RATIO_GROUPING, + "num_workers": cfg.DATALOADER.NUM_WORKERS, # 8 + 'multi_dataset_grouping': cfg.DATALOADER.MULTI_DATASET_GROUPING, # True + 'use_diff_bs_size': cfg.DATALOADER.USE_DIFF_BS_SIZE, # True + 'dataset_bs': cfg.DATALOADER.DATASET_BS, # [8, 32] + 'num_datasets': len(cfg.DATASETS.TRAIN) # 2 + } + + +@configurable(from_config=_custom_train_loader_from_config) +def build_custom_train_loader( + dataset, *, mapper, sampler, + total_batch_size=16, # 64 + aspect_ratio_grouping=True, + num_workers=0, # 8 + num_datasets=1, # 2 + multi_dataset_grouping=False, # True + use_diff_bs_size=False, # True + dataset_bs=[] # [8, 32] + ): + """ + Modified from detectron2.data.build.build_custom_train_loader, but supports + different samplers + """ + if isinstance(dataset, list): + dataset = DatasetFromList(dataset, copy=False) + if mapper is not None: # True + dataset = MapDataset(dataset, mapper) + if sampler is None: # False + sampler = TrainingSampler(len(dataset)) + assert isinstance(sampler, torch.utils.data.sampler.Sampler) + if multi_dataset_grouping: # True + return build_multi_dataset_batch_data_loader( + use_diff_bs_size, + dataset_bs, + dataset, + sampler, + total_batch_size, + num_datasets=num_datasets, + num_workers=num_workers, + ) + else: # False + return build_batch_data_loader( + dataset, + sampler, + total_batch_size, + aspect_ratio_grouping=aspect_ratio_grouping, + num_workers=num_workers, + ) + + +def build_multi_dataset_batch_data_loader( + use_diff_bs_size, dataset_bs, + dataset, sampler, total_batch_size, num_datasets, num_workers=0 +): + """ + """ + world_size = get_world_size() + assert ( + total_batch_size > 0 and total_batch_size % world_size == 0 + ), "Total batch size ({}) must be divisible by the number of gpus ({}).".format( + total_batch_size, world_size + ) + + batch_size = total_batch_size // world_size + data_loader = torch.utils.data.DataLoader( + dataset, + sampler=sampler, + num_workers=num_workers, + batch_sampler=None, + collate_fn=operator.itemgetter(0), # don't batch, but yield individual elements + worker_init_fn=worker_init_reset_seed, + ) # yield individual mapped dict + if use_diff_bs_size: + return DIFFMDAspectRatioGroupedDataset( + data_loader, dataset_bs, num_datasets) + else: + return MDAspectRatioGroupedDataset( + data_loader, batch_size, num_datasets) + + +def get_detection_dataset_dicts_with_source( + dataset_names, filter_empty=True, min_keypoints=0, proposal_files=None +): + assert len(dataset_names) + dataset_dicts = [DatasetCatalog.get(dataset_name) for dataset_name in dataset_names] + for dataset_name, dicts in zip(dataset_names, dataset_dicts): + assert len(dicts), "Dataset '{}' is empty!".format(dataset_name) + + for source_id, (dataset_name, dicts) in \ + enumerate(zip(dataset_names, dataset_dicts)): + assert len(dicts), "Dataset '{}' is empty!".format(dataset_name) + for d in dicts: + d['dataset_source'] = source_id # add "dataset_source" to original dict + + if "annotations" in dicts[0]: + try: + class_names = MetadataCatalog.get(dataset_name).thing_classes + check_metadata_consistency("thing_classes", dataset_name) + print_instances_class_histogram(dicts, class_names) + except AttributeError: # class names are not available for this dataset + pass + + assert proposal_files is None + + dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts)) # connect multiple iterable objects to one + + has_instances = "annotations" in dataset_dicts[0] + if filter_empty and has_instances: + dataset_dicts = filter_images_with_only_crowd_annotations(dataset_dicts) + if min_keypoints > 0 and has_instances: + dataset_dicts = filter_images_with_few_keypoints(dataset_dicts, min_keypoints) + + return dataset_dicts + + +class MultiDatasetSampler(Sampler): + def __init__( + self, + dataset_dicts, + dataset_ratio, + use_rfs, # [True, False] + dataset_ann, + repeat_threshold=0.001, + seed: Optional[int] = None, + ): + """ + """ + sizes = [0 for _ in range(len(dataset_ratio))] + for d in dataset_dicts: + sizes[d['dataset_source']] += 1 # size of each dataset + print('dataset sizes', sizes) + self.sizes = sizes + assert len(dataset_ratio) == len(sizes), \ + 'length of dataset ratio {} should be equal to number if dataset {}'.format( + len(dataset_ratio), len(sizes) + ) + if seed is None: + seed = comm.shared_random_seed() # seed shared across all GPUs + self._seed = int(seed) + self._rank = comm.get_rank() + self._world_size = comm.get_world_size() + + self.dataset_ids = torch.tensor( + [d['dataset_source'] for d in dataset_dicts], dtype=torch.long) + + dataset_weight = [torch.ones(s) * max(sizes) / s * r / sum(dataset_ratio) \ + for i, (r, s) in enumerate(zip(dataset_ratio, sizes))] + dataset_weight = torch.cat(dataset_weight) + + rfs_factors = [] + st = 0 + for i, s in enumerate(sizes): + if use_rfs[i]: + if dataset_ann[i] == 'box': + rfs_func = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency + else: + rfs_func = repeat_factors_from_tag_frequency + rfs_factor = rfs_func( + dataset_dicts[st: st + s], + repeat_thresh=repeat_threshold) + rfs_factor = rfs_factor * (s / rfs_factor.sum()) + else: + rfs_factor = torch.ones(s) + rfs_factors.append(rfs_factor) + st = st + s + rfs_factors = torch.cat(rfs_factors) + + self.weights = dataset_weight * rfs_factors # weights for each element in the dataset_dict + self.sample_epoch_size = len(self.weights) + + def __iter__(self): + start = self._rank + yield from itertools.islice( + self._infinite_indices(), start, None, self._world_size) # itertools.islice(iterable, start, stop[, step]) + + + def _infinite_indices(self): + g = torch.Generator() + g.manual_seed(self._seed) + while True: + ids = torch.multinomial( + self.weights, self.sample_epoch_size, generator=g, + replacement=True) # randomly sample according to the given weights + nums = [(self.dataset_ids[ids] == i).sum().int().item() \ + for i in range(len(self.sizes))] + yield from ids + + +class MDAspectRatioGroupedDataset(torch.utils.data.IterableDataset): + def __init__(self, dataset, batch_size, num_datasets): + """ + """ + self.dataset = dataset + self.batch_size = batch_size + self._buckets = [[] for _ in range(2 * num_datasets)] # there are (2 x num_datasets) types of data. For each dataset, there are two types: w>h or w<=h + + def __iter__(self): + for d in self.dataset: + w, h = d["width"], d["height"] + aspect_ratio_bucket_id = 0 if w > h else 1 + bucket_id = d['dataset_source'] * 2 + aspect_ratio_bucket_id + bucket = self._buckets[bucket_id] + bucket.append(d) + if len(bucket) == self.batch_size: + yield bucket[:] + del bucket[:] + + +class DIFFMDAspectRatioGroupedDataset(torch.utils.data.IterableDataset): + def __init__(self, dataset, batch_sizes, num_datasets): + """ + """ + self.dataset = dataset + self.batch_sizes = batch_sizes + self._buckets = [[] for _ in range(2 * num_datasets)] + + def __iter__(self): + for d in self.dataset: + w, h = d["width"], d["height"] + aspect_ratio_bucket_id = 0 if w > h else 1 + bucket_id = d['dataset_source'] * 2 + aspect_ratio_bucket_id + bucket = self._buckets[bucket_id] + bucket.append(d) + if len(bucket) == self.batch_sizes[d['dataset_source']]: # allow different batchsizes + yield bucket[:] + del bucket[:] + + +def repeat_factors_from_tag_frequency(dataset_dicts, repeat_thresh): + """ + """ + category_freq = defaultdict(int) + for dataset_dict in dataset_dicts: + cat_ids = dataset_dict['pos_category_ids'] + for cat_id in cat_ids: + category_freq[cat_id] += 1 + num_images = len(dataset_dicts) + for k, v in category_freq.items(): + category_freq[k] = v / num_images + + category_rep = { + cat_id: max(1.0, math.sqrt(repeat_thresh / cat_freq)) + for cat_id, cat_freq in category_freq.items() + } + + rep_factors = [] + for dataset_dict in dataset_dicts: + cat_ids = dataset_dict['pos_category_ids'] + rep_factor = max({category_rep[cat_id] for cat_id in cat_ids}, default=1.0) + rep_factors.append(rep_factor) + + return torch.tensor(rep_factors, dtype=torch.float32) \ No newline at end of file diff --git a/mask_adapter/data/dataset_mappers/__init__.py b/mask_adapter/data/dataset_mappers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..be75f0cd9568f901b3174ecfb43c0b9f4fa1f77d --- /dev/null +++ b/mask_adapter/data/dataset_mappers/__init__.py @@ -0,0 +1,15 @@ +""" +Copyright (2023) Bytedance Ltd. and/or its affiliates + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" \ No newline at end of file diff --git a/mask_adapter/data/dataset_mappers/__pycache__/__init__.cpython-310.pyc b/mask_adapter/data/dataset_mappers/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d54179f1819d7559f03936ad60f3219494fcb338 Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/__init__.cpython-310.pyc differ diff --git a/mask_adapter/data/dataset_mappers/__pycache__/__init__.cpython-38.pyc b/mask_adapter/data/dataset_mappers/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c1c994f03aa9bcc090b9bdc75572b100ce5cb665 Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/__init__.cpython-38.pyc differ diff --git a/mask_adapter/data/dataset_mappers/__pycache__/coco_combine_new_baseline_dataset_mapper.cpython-310.pyc b/mask_adapter/data/dataset_mappers/__pycache__/coco_combine_new_baseline_dataset_mapper.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f7b17c8d22cc19161c434afdd8b8c8c4833b357e Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/coco_combine_new_baseline_dataset_mapper.cpython-310.pyc differ diff --git a/mask_adapter/data/dataset_mappers/__pycache__/coco_combine_new_baseline_dataset_mapper.cpython-38.pyc b/mask_adapter/data/dataset_mappers/__pycache__/coco_combine_new_baseline_dataset_mapper.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..640b5d968f097bcb6832b990fc867d3a78f8b855 Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/coco_combine_new_baseline_dataset_mapper.cpython-38.pyc differ diff --git a/mask_adapter/data/dataset_mappers/__pycache__/coco_instance_new_baseline_dataset_mapper.cpython-310.pyc b/mask_adapter/data/dataset_mappers/__pycache__/coco_instance_new_baseline_dataset_mapper.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..54077cec075535f2b14ec46eee15782d5149cf67 Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/coco_instance_new_baseline_dataset_mapper.cpython-310.pyc differ diff --git a/mask_adapter/data/dataset_mappers/__pycache__/coco_instance_new_baseline_dataset_mapper.cpython-38.pyc b/mask_adapter/data/dataset_mappers/__pycache__/coco_instance_new_baseline_dataset_mapper.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5c00231b0f41e69b45dab1723618ca5a0d44ff88 Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/coco_instance_new_baseline_dataset_mapper.cpython-38.pyc differ diff --git a/mask_adapter/data/dataset_mappers/__pycache__/coco_panoptic_new_baseline_dataset_mapper.cpython-310.pyc b/mask_adapter/data/dataset_mappers/__pycache__/coco_panoptic_new_baseline_dataset_mapper.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..27ff8c7a33ebdc431d820c32f5634eb1f0821c3a Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/coco_panoptic_new_baseline_dataset_mapper.cpython-310.pyc differ diff --git a/mask_adapter/data/dataset_mappers/__pycache__/coco_panoptic_new_baseline_dataset_mapper.cpython-38.pyc b/mask_adapter/data/dataset_mappers/__pycache__/coco_panoptic_new_baseline_dataset_mapper.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..23d5f0d516bebe6b877b6b33d1326d2b1d961728 Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/coco_panoptic_new_baseline_dataset_mapper.cpython-38.pyc differ diff --git a/mask_adapter/data/dataset_mappers/__pycache__/grand_new_baseline_dataset_mapper.cpython-38.pyc b/mask_adapter/data/dataset_mappers/__pycache__/grand_new_baseline_dataset_mapper.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..34bb7029396c68226e2a4b8b684a1eb76ba93623 Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/grand_new_baseline_dataset_mapper.cpython-38.pyc differ diff --git a/mask_adapter/data/dataset_mappers/__pycache__/mask_former_instance_dataset_mapper.cpython-310.pyc b/mask_adapter/data/dataset_mappers/__pycache__/mask_former_instance_dataset_mapper.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..305947bad044a823a0afeead17cb24ce2c61c93e Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/mask_former_instance_dataset_mapper.cpython-310.pyc differ diff --git a/mask_adapter/data/dataset_mappers/__pycache__/mask_former_instance_dataset_mapper.cpython-38.pyc b/mask_adapter/data/dataset_mappers/__pycache__/mask_former_instance_dataset_mapper.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c62df5711c56928ccc5a95d1ac5c8d232e5d2a99 Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/mask_former_instance_dataset_mapper.cpython-38.pyc differ diff --git a/mask_adapter/data/dataset_mappers/__pycache__/mask_former_panoptic_dataset_mapper.cpython-310.pyc b/mask_adapter/data/dataset_mappers/__pycache__/mask_former_panoptic_dataset_mapper.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8b072b17d37d3e9318e29009ca71cb6fd51c98c9 Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/mask_former_panoptic_dataset_mapper.cpython-310.pyc differ diff --git a/mask_adapter/data/dataset_mappers/__pycache__/mask_former_panoptic_dataset_mapper.cpython-38.pyc b/mask_adapter/data/dataset_mappers/__pycache__/mask_former_panoptic_dataset_mapper.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..31a82d2e1c280547690a62ab81247d78d1fc9ddf Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/mask_former_panoptic_dataset_mapper.cpython-38.pyc differ diff --git a/mask_adapter/data/dataset_mappers/__pycache__/mask_former_semantic_dataset_mapper.cpython-310.pyc b/mask_adapter/data/dataset_mappers/__pycache__/mask_former_semantic_dataset_mapper.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..80aed9d2102ec75e6e3e90cbbac05ac39e81e357 Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/mask_former_semantic_dataset_mapper.cpython-310.pyc differ diff --git a/mask_adapter/data/dataset_mappers/__pycache__/mask_former_semantic_dataset_mapper.cpython-38.pyc b/mask_adapter/data/dataset_mappers/__pycache__/mask_former_semantic_dataset_mapper.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fa903478f69e45811b313a6812c0323dbe90e7eb Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/mask_former_semantic_dataset_mapper.cpython-38.pyc differ diff --git a/mask_adapter/data/dataset_mappers/coco_combine_new_baseline_dataset_mapper.py b/mask_adapter/data/dataset_mappers/coco_combine_new_baseline_dataset_mapper.py new file mode 100644 index 0000000000000000000000000000000000000000..9870117a8aa3119385635d8b0a91e5d9a549845b --- /dev/null +++ b/mask_adapter/data/dataset_mappers/coco_combine_new_baseline_dataset_mapper.py @@ -0,0 +1,237 @@ +""" +This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”). +All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. + +Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/dataset_mappers/mask_former_instance_dataset_mapper.py +""" + +import copy +import logging + +import numpy as np +import pycocotools.mask as mask_util +import torch +from torch.nn import functional as F +import time +from detectron2.config import configurable +from detectron2.data import detection_utils as utils +from detectron2.data import transforms as T +from detectron2.projects.point_rend import ColorAugSSDTransform +from detectron2.structures import BitMasks, Instances, polygons_to_bitmask, BoxMode,Boxes +from PIL import Image + +__all__ = ["COCOCombineNewBaselineDatasetMapper"] + + +class COCOCombineNewBaselineDatasetMapper: + """ + A callable which takes a dataset dict in Detectron2 Dataset format, + and map it into a format used by MaskFormer for instance segmentation. + + The callable currently does the following: + + 1. Read the image from "file_name" + 2. Applies geometric transforms to the image and annotation + 3. Find and applies suitable cropping to the image and annotation + 4. Prepare image and annotation to Tensors + """ + + @configurable + def __init__( + self, + is_train=True, + *, + augmentations, + image_format, + size_divisibility, + ): + """ + NOTE: this interface is experimental. + Args: + is_train: for training or inference + augmentations: a list of augmentations or deterministic transforms to apply + image_format: an image format supported by :func:`detection_utils.read_image`. + size_divisibility: pad image size to be divisible by this value + """ + self.is_train = is_train + self.tfm_gens = augmentations + self.img_format = image_format + self.size_divisibility = size_divisibility + + + logger = logging.getLogger(__name__) + mode = "training" if is_train else "inference" + logger.info(f"[{self.__class__.__name__}] Augmentations used in {mode}: {augmentations}") + + @classmethod + def from_config(cls, cfg, is_train=True): + # Build augmentation + image_size = cfg.INPUT.IMAGE_SIZE + min_scale = cfg.INPUT.MIN_SCALE + max_scale = cfg.INPUT.MAX_SCALE + + augmentation = [] + + if cfg.INPUT.RANDOM_FLIP != "none": + augmentation.append( + T.RandomFlip( + horizontal=cfg.INPUT.RANDOM_FLIP == "horizontal", + vertical=cfg.INPUT.RANDOM_FLIP == "vertical", + ) + ) + augmentation.extend([ + T.ResizeScale( + min_scale=min_scale, max_scale=max_scale, target_height=image_size, target_width=image_size + ), + T.FixedSizeCrop(crop_size=(image_size, image_size)), + ]) + + if cfg.INPUT.COLOR_AUG_SSD: + augmentation.append(ColorAugSSDTransform(img_format=cfg.INPUT.FORMAT)) + + ret = { + "is_train": is_train, + "augmentations": augmentation, + "image_format": cfg.INPUT.FORMAT, + "size_divisibility": cfg.INPUT.SIZE_DIVISIBILITY, + } + return ret + + def __call__(self, dataset_dict): + """ + Args: + dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. + + Returns: + dict: a format that builtin models in detectron2 accept + """ + + dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below + image = utils.read_image(dataset_dict["file_name"], format=self.img_format) + image_shape_before = image.shape[:2] + utils.check_image_size(dataset_dict, image) + + aug_input = T.AugInput(image) + aug_input, transforms = T.apply_transform_gens(self.tfm_gens, aug_input) + image = aug_input.image + image_shape = image.shape[:2] + + # transform instnace masks + #assert "annotations" in dataset_dict + if "annotations" in dataset_dict : + + file_name = dataset_dict["file_name"] + if "obj365" in file_name: + dataset_dict["dataname"] = "objects365_v1_masktrain" + elif "stuff" in file_name: + dataset_dict["dataname"] = "openvocab_coco_2017_train_stuff_sem_seg" + else: + dataset_dict["dataname"] = "lvis_v1_train" + + for anno in dataset_dict["annotations"]: + anno.pop("keypoints", None) + + annos = [ + utils.transform_instance_annotations(obj, transforms, image.shape[:2]) + for obj in dataset_dict.pop("annotations") + if obj.get("iscrowd", 0) == 0 + ] + + if len(annos): + assert "segmentation" in annos[0] + segms = [obj["segmentation"] for obj in annos] + masks = [] + for segm in segms: + + if isinstance(segm, list): + # polygon + masks.append(polygons_to_bitmask(segm, *image.shape[:2])) + elif isinstance(segm, dict): + # COCO RLE + masks.append(mask_util.decode(segm)) + elif isinstance(segm, np.ndarray): + assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format( + segm.ndim + ) + assert dataset_dict["dataname"] == "objects365_v1_masktrain" + # mask array + masks.append(segm) + else: + raise ValueError( + "Cannot convert segmentation of type '{}' to BitMasks!" + "Supported types are: polygons as list[list[float] or ndarray]," + " COCO-style RLE as a dict, or a binary segmentation mask " + " in a 2D numpy array of shape HxW.".format(type(segm)) + ) + + # Pad image and segmentation label here! + image = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) + masks = [torch.from_numpy(np.ascontiguousarray(x)) for x in masks] + + classes = [int(obj["category_id"]) for obj in annos] + classes = torch.tensor(classes, dtype=torch.int64) + elif "pan_seg_file_name" in dataset_dict: + pan_seg_gt = utils.read_image(dataset_dict.pop("pan_seg_file_name"), "RGB") + segments_info = dataset_dict["segments_info"] + + # apply the same transformation to panoptic segmentation + + pan_seg_gt = transforms.apply_segmentation(pan_seg_gt) + + from panopticapi.utils import rgb2id + + pan_seg_gt = rgb2id(pan_seg_gt) + + instances = Instances(image_shape) + classes = [] + masks = [] + for segment_info in segments_info: + class_id = segment_info["category_id"] + if not segment_info["iscrowd"]: + classes.append(class_id) + masks.append(pan_seg_gt == segment_info["id"]) + + classes = np.array(classes) + classes = torch.tensor(classes, dtype=torch.int64) + masks = [torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks] + image = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) + + + + if self.size_divisibility > 0: + image_size = (image.shape[-2], image.shape[-1]) + padding_size = [ + 0, + self.size_divisibility - image_size[1], + 0, + self.size_divisibility - image_size[0], + ] + # pad image + image = F.pad(image, padding_size, value=128).contiguous() + # pad mask + masks = [F.pad(x, padding_size, value=0).contiguous() for x in masks] + + image_shape = (image.shape[-2], image.shape[-1]) # h, w + + # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, + # but not efficient on large generic data structures due to the use of pickle & mp.Queue. + # Therefore it's important to use torch.Tensor. + dataset_dict["image"] = image + + # Prepare per-category binary masks + instances = Instances(image_shape) + instances.gt_classes = classes + + #boxes = np.zeros((0, 4)) + if len(masks) == 0: + # Some image does not have annotation (all ignored) + instances.gt_masks = torch.zeros((0, image.shape[-2], image.shape[-1])) + else: + masks = BitMasks(torch.stack(masks)) + instances.gt_masks = masks #.tensor + + #instances.gt_boxes = Boxes(boxes) + dataset_dict["instances"] = instances + + return dataset_dict + diff --git a/mask_adapter/data/dataset_mappers/coco_instance_new_baseline_dataset_mapper.py b/mask_adapter/data/dataset_mappers/coco_instance_new_baseline_dataset_mapper.py new file mode 100644 index 0000000000000000000000000000000000000000..2a7fc6f86a3f371f4f32bb35b2189b5837eb9f78 --- /dev/null +++ b/mask_adapter/data/dataset_mappers/coco_instance_new_baseline_dataset_mapper.py @@ -0,0 +1,194 @@ +""" +This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”). +All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. + +Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/dataset_mappers/coco_instance_new_baseline_dataset_mapper.py +""" + +import copy +import logging + +import numpy as np +import torch + +from detectron2.config import configurable +from detectron2.data import detection_utils as utils +from detectron2.data import transforms as T +from detectron2.data.transforms import TransformGen +from detectron2.structures import BitMasks, Instances + +from pycocotools import mask as coco_mask + +__all__ = ["COCOInstanceNewBaselineDatasetMapper"] + + +def convert_coco_poly_to_mask(segmentations, height, width): + masks = [] + for polygons in segmentations: + rles = coco_mask.frPyObjects(polygons, height, width) + mask = coco_mask.decode(rles) + if len(mask.shape) < 3: + mask = mask[..., None] + mask = torch.as_tensor(mask, dtype=torch.uint8) + mask = mask.any(dim=2) + masks.append(mask) + if masks: + masks = torch.stack(masks, dim=0) + else: + masks = torch.zeros((0, height, width), dtype=torch.uint8) + return masks + + +def build_transform_gen(cfg, is_train): + """ + Create a list of default :class:`Augmentation` from config. + Now it includes resizing and flipping. + Returns: + list[Augmentation] + """ + assert is_train, "Only support training augmentation" + image_size = cfg.INPUT.IMAGE_SIZE + min_scale = cfg.INPUT.MIN_SCALE + max_scale = cfg.INPUT.MAX_SCALE + + augmentation = [] + + if cfg.INPUT.RANDOM_FLIP != "none": + augmentation.append( + T.RandomFlip( + horizontal=cfg.INPUT.RANDOM_FLIP == "horizontal", + vertical=cfg.INPUT.RANDOM_FLIP == "vertical", + ) + ) + + augmentation.extend([ + T.ResizeScale( + min_scale=min_scale, max_scale=max_scale, target_height=image_size, target_width=image_size + ), + T.FixedSizeCrop(crop_size=(image_size, image_size)), + ]) + + return augmentation + + +# This is specifically designed for the COCO dataset. +class COCOInstanceNewBaselineDatasetMapper: + """ + A callable which takes a dataset dict in Detectron2 Dataset format, + and map it into a format used by MaskFormer. + + This dataset mapper applies the same transformation as DETR for COCO panoptic segmentation. + + The callable currently does the following: + + 1. Read the image from "file_name" + 2. Applies geometric transforms to the image and annotation + 3. Find and applies suitable cropping to the image and annotation + 4. Prepare image and annotation to Tensors + """ + + @configurable + def __init__( + self, + is_train=True, + *, + tfm_gens, + image_format, + ): + """ + NOTE: this interface is experimental. + Args: + is_train: for training or inference + augmentations: a list of augmentations or deterministic transforms to apply + tfm_gens: data augmentation + image_format: an image format supported by :func:`detection_utils.read_image`. + """ + self.tfm_gens = tfm_gens + logging.getLogger(__name__).info( + "[COCOInstanceNewBaselineDatasetMapper] Full TransformGens used in training: {}".format(str(self.tfm_gens)) + ) + + self.img_format = image_format + self.is_train = is_train + + @classmethod + def from_config(cls, cfg, is_train=True): + # Build augmentation + tfm_gens = build_transform_gen(cfg, is_train) + + ret = { + "is_train": is_train, + "tfm_gens": tfm_gens, + "image_format": cfg.INPUT.FORMAT, + } + return ret + + def __call__(self, dataset_dict): + """ + Args: + dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. + + Returns: + dict: a format that builtin models in detectron2 accept + """ + dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below + image = utils.read_image(dataset_dict["file_name"], format=self.img_format) + utils.check_image_size(dataset_dict, image) + + # TODO: get padding mask + # by feeding a "segmentation mask" to the same transforms + padding_mask = np.ones(image.shape[:2]) + + image, transforms = T.apply_transform_gens(self.tfm_gens, image) + # the crop transformation has default padding value 0 for segmentation + padding_mask = transforms.apply_segmentation(padding_mask) + padding_mask = ~ padding_mask.astype(bool) + + image_shape = image.shape[:2] # h, w + + # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, + # but not efficient on large generic data structures due to the use of pickle & mp.Queue. + # Therefore it's important to use torch.Tensor. + dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) + dataset_dict["padding_mask"] = torch.as_tensor(np.ascontiguousarray(padding_mask)) + + if not self.is_train: + # USER: Modify this if you want to keep them for some reason. + dataset_dict.pop("annotations", None) + return dataset_dict + + if "annotations" in dataset_dict: + # USER: Modify this if you want to keep them for some reason. + for anno in dataset_dict["annotations"]: + # Let's always keep mask + # if not self.mask_on: + # anno.pop("segmentation", None) + anno.pop("keypoints", None) + + # USER: Implement additional transformations if you have other types of data + annos = [ + utils.transform_instance_annotations(obj, transforms, image_shape) + for obj in dataset_dict.pop("annotations") + if obj.get("iscrowd", 0) == 0 + ] + # NOTE: does not support BitMask due to augmentation + # Current BitMask cannot handle empty objects + instances = utils.annotations_to_instances(annos, image_shape) + # After transforms such as cropping are applied, the bounding box may no longer + # tightly bound the object. As an example, imagine a triangle object + # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight + # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to + # the intersection of original bounding box and the cropping box. + instances.gt_boxes = instances.gt_masks.get_bounding_boxes() + # Need to filter empty instances first (due to augmentation) + instances = utils.filter_empty_instances(instances) + # Generate masks from polygon + h, w = instances.image_size + # image_size_xyxy = torch.as_tensor([w, h, w, h], dtype=torch.float) + if hasattr(instances, 'gt_masks'): + gt_masks = instances.gt_masks + gt_masks = convert_coco_poly_to_mask(gt_masks.polygons, h, w) + instances.gt_masks = gt_masks + dataset_dict["instances"] = instances + + return dataset_dict diff --git a/mask_adapter/data/dataset_mappers/coco_panoptic_new_baseline_dataset_mapper.py b/mask_adapter/data/dataset_mappers/coco_panoptic_new_baseline_dataset_mapper.py new file mode 100644 index 0000000000000000000000000000000000000000..89e616a60434a1d66edd10e73f3c1d53438c6d8d --- /dev/null +++ b/mask_adapter/data/dataset_mappers/coco_panoptic_new_baseline_dataset_mapper.py @@ -0,0 +1,170 @@ +""" +This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”). +All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. + +Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/dataset_mappers/coco_panoptic_new_baseline_dataset_mapper.py +""" + +import copy +import logging + +import numpy as np +import torch + +from detectron2.config import configurable +from detectron2.data import detection_utils as utils +from detectron2.data import transforms as T +from detectron2.data.transforms import TransformGen +from detectron2.structures import BitMasks, Boxes, Instances + +__all__ = ["COCOPanopticNewBaselineDatasetMapper"] + + +def build_transform_gen(cfg, is_train): + """ + Create a list of default :class:`Augmentation` from config. + Now it includes resizing and flipping. + Returns: + list[Augmentation] + """ + assert is_train, "Only support training augmentation" + image_size = cfg.INPUT.IMAGE_SIZE + min_scale = cfg.INPUT.MIN_SCALE + max_scale = cfg.INPUT.MAX_SCALE + + augmentation = [] + + if cfg.INPUT.RANDOM_FLIP != "none": + augmentation.append( + T.RandomFlip( + horizontal=cfg.INPUT.RANDOM_FLIP == "horizontal", + vertical=cfg.INPUT.RANDOM_FLIP == "vertical", + ) + ) + + augmentation.extend([ + T.ResizeScale( + min_scale=min_scale, max_scale=max_scale, target_height=image_size, target_width=image_size + ), + T.FixedSizeCrop(crop_size=(image_size, image_size)), + ]) + + return augmentation + + +# This is specifically designed for the COCO dataset. +class COCOPanopticNewBaselineDatasetMapper: + """ + A callable which takes a dataset dict in Detectron2 Dataset format, + and map it into a format used by MaskFormer. + + This dataset mapper applies the same transformation as DETR for COCO panoptic segmentation. + + The callable currently does the following: + + 1. Read the image from "file_name" + 2. Applies geometric transforms to the image and annotation + 3. Find and applies suitable cropping to the image and annotation + 4. Prepare image and annotation to Tensors + """ + + @configurable + def __init__( + self, + is_train=True, + *, + tfm_gens, + image_format, + ): + """ + NOTE: this interface is experimental. + Args: + is_train: for training or inference + augmentations: a list of augmentations or deterministic transforms to apply + crop_gen: crop augmentation + tfm_gens: data augmentation + image_format: an image format supported by :func:`detection_utils.read_image`. + """ + self.tfm_gens = tfm_gens + logging.getLogger(__name__).info( + "[COCOPanopticNewBaselineDatasetMapper] Full TransformGens used in training: {}".format( + str(self.tfm_gens) + ) + ) + + self.img_format = image_format + self.is_train = is_train + + @classmethod + def from_config(cls, cfg, is_train=True): + # Build augmentation + tfm_gens = build_transform_gen(cfg, is_train) + + ret = { + "is_train": is_train, + "tfm_gens": tfm_gens, + "image_format": cfg.INPUT.FORMAT, + } + return ret + + def __call__(self, dataset_dict): + """ + Args: + dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. + + Returns: + dict: a format that builtin models in detectron2 accept + """ + dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below + image = utils.read_image(dataset_dict["file_name"], format=self.img_format) + utils.check_image_size(dataset_dict, image) + + image, transforms = T.apply_transform_gens(self.tfm_gens, image) + image_shape = image.shape[:2] # h, w + + # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, + # but not efficient on large generic data structures due to the use of pickle & mp.Queue. + # Therefore it's important to use torch.Tensor. + dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) + + if not self.is_train: + # USER: Modify this if you want to keep them for some reason. + dataset_dict.pop("annotations", None) + return dataset_dict + + if "pan_seg_file_name" in dataset_dict: + pan_seg_gt = utils.read_image(dataset_dict.pop("pan_seg_file_name"), "RGB") + segments_info = dataset_dict["segments_info"] + + # apply the same transformation to panoptic segmentation + pan_seg_gt = transforms.apply_segmentation(pan_seg_gt) + + from panopticapi.utils import rgb2id + + pan_seg_gt = rgb2id(pan_seg_gt) + + instances = Instances(image_shape) + classes = [] + masks = [] + for segment_info in segments_info: + class_id = segment_info["category_id"] + if not segment_info["iscrowd"]: + classes.append(class_id) + masks.append(pan_seg_gt == segment_info["id"]) + + classes = np.array(classes) + instances.gt_classes = torch.tensor(classes, dtype=torch.int64) + if len(masks) == 0: + # Some image does not have annotation (all ignored) + instances.gt_masks = torch.zeros((0, pan_seg_gt.shape[-2], pan_seg_gt.shape[-1])) + instances.gt_boxes = Boxes(torch.zeros((0, 4))) + else: + masks = BitMasks( + torch.stack([torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks]) + ) + instances.gt_masks = masks.tensor + instances.gt_boxes = masks.get_bounding_boxes() + + dataset_dict["instances"] = instances + + return dataset_dict diff --git a/mask_adapter/data/dataset_mappers/mask_former_instance_dataset_mapper.py b/mask_adapter/data/dataset_mappers/mask_former_instance_dataset_mapper.py new file mode 100644 index 0000000000000000000000000000000000000000..783cc86c1f87542b69e11db3464fd57afd89a086 --- /dev/null +++ b/mask_adapter/data/dataset_mappers/mask_former_instance_dataset_mapper.py @@ -0,0 +1,186 @@ +""" +This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”). +All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. + +Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/dataset_mappers/mask_former_instance_dataset_mapper.py +""" + +import copy +import logging + +import numpy as np +import pycocotools.mask as mask_util +import torch +from torch.nn import functional as F + +from detectron2.config import configurable +from detectron2.data import detection_utils as utils +from detectron2.data import transforms as T +from detectron2.projects.point_rend import ColorAugSSDTransform +from detectron2.structures import BitMasks, Instances, polygons_to_bitmask + +__all__ = ["MaskFormerInstanceDatasetMapper"] + + +class MaskFormerInstanceDatasetMapper: + """ + A callable which takes a dataset dict in Detectron2 Dataset format, + and map it into a format used by MaskFormer for instance segmentation. + + The callable currently does the following: + + 1. Read the image from "file_name" + 2. Applies geometric transforms to the image and annotation + 3. Find and applies suitable cropping to the image and annotation + 4. Prepare image and annotation to Tensors + """ + + @configurable + def __init__( + self, + is_train=True, + *, + augmentations, + image_format, + size_divisibility, + ): + """ + NOTE: this interface is experimental. + Args: + is_train: for training or inference + augmentations: a list of augmentations or deterministic transforms to apply + image_format: an image format supported by :func:`detection_utils.read_image`. + size_divisibility: pad image size to be divisible by this value + """ + self.is_train = is_train + self.tfm_gens = augmentations + self.img_format = image_format + self.size_divisibility = size_divisibility + + logger = logging.getLogger(__name__) + mode = "training" if is_train else "inference" + logger.info(f"[{self.__class__.__name__}] Augmentations used in {mode}: {augmentations}") + + @classmethod + def from_config(cls, cfg, is_train=True): + # Build augmentation + augs = [ + T.ResizeShortestEdge( + cfg.INPUT.MIN_SIZE_TRAIN, + cfg.INPUT.MAX_SIZE_TRAIN, + cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING, + ) + ] + if cfg.INPUT.CROP.ENABLED: + augs.append( + T.RandomCrop( + cfg.INPUT.CROP.TYPE, + cfg.INPUT.CROP.SIZE, + ) + ) + if cfg.INPUT.COLOR_AUG_SSD: + augs.append(ColorAugSSDTransform(img_format=cfg.INPUT.FORMAT)) + augs.append(T.RandomFlip()) + + ret = { + "is_train": is_train, + "augmentations": augs, + "image_format": cfg.INPUT.FORMAT, + "size_divisibility": cfg.INPUT.SIZE_DIVISIBILITY, + } + return ret + + def __call__(self, dataset_dict): + """ + Args: + dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. + + Returns: + dict: a format that builtin models in detectron2 accept + """ + assert self.is_train, "MaskFormerPanopticDatasetMapper should only be used for training!" + + dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below + image = utils.read_image(dataset_dict["file_name"], format=self.img_format) + utils.check_image_size(dataset_dict, image) + + aug_input = T.AugInput(image) + aug_input, transforms = T.apply_transform_gens(self.tfm_gens, aug_input) + image = aug_input.image + + # transform instnace masks + assert "annotations" in dataset_dict + for anno in dataset_dict["annotations"]: + anno.pop("keypoints", None) + + annos = [ + utils.transform_instance_annotations(obj, transforms, image.shape[:2]) + for obj in dataset_dict.pop("annotations") + if obj.get("iscrowd", 0) == 0 + ] + + if len(annos): + assert "segmentation" in annos[0] + segms = [obj["segmentation"] for obj in annos] + masks = [] + for segm in segms: + if isinstance(segm, list): + # polygon + masks.append(polygons_to_bitmask(segm, *image.shape[:2])) + elif isinstance(segm, dict): + # COCO RLE + masks.append(mask_util.decode(segm)) + elif isinstance(segm, np.ndarray): + assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format( + segm.ndim + ) + # mask array + masks.append(segm) + else: + raise ValueError( + "Cannot convert segmentation of type '{}' to BitMasks!" + "Supported types are: polygons as list[list[float] or ndarray]," + " COCO-style RLE as a dict, or a binary segmentation mask " + " in a 2D numpy array of shape HxW.".format(type(segm)) + ) + + # Pad image and segmentation label here! + image = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) + masks = [torch.from_numpy(np.ascontiguousarray(x)) for x in masks] + + classes = [int(obj["category_id"]) for obj in annos] + classes = torch.tensor(classes, dtype=torch.int64) + + if self.size_divisibility > 0: + image_size = (image.shape[-2], image.shape[-1]) + padding_size = [ + 0, + self.size_divisibility - image_size[1], + 0, + self.size_divisibility - image_size[0], + ] + # pad image + image = F.pad(image, padding_size, value=128).contiguous() + # pad mask + masks = [F.pad(x, padding_size, value=0).contiguous() for x in masks] + + image_shape = (image.shape[-2], image.shape[-1]) # h, w + + # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, + # but not efficient on large generic data structures due to the use of pickle & mp.Queue. + # Therefore it's important to use torch.Tensor. + dataset_dict["image"] = image + + # Prepare per-category binary masks + instances = Instances(image_shape) + instances.gt_classes = classes + if len(masks) == 0: + # Some image does not have annotation (all ignored) + instances.gt_masks = torch.zeros((0, image.shape[-2], image.shape[-1])) + else: + masks = BitMasks(torch.stack(masks)) + instances.gt_masks = masks.tensor + + dataset_dict["instances"] = instances + + return dataset_dict diff --git a/mask_adapter/data/dataset_mappers/mask_former_panoptic_dataset_mapper.py b/mask_adapter/data/dataset_mappers/mask_former_panoptic_dataset_mapper.py new file mode 100644 index 0000000000000000000000000000000000000000..a3c618ee2ae78484cd7689df49dd481d5a19e94a --- /dev/null +++ b/mask_adapter/data/dataset_mappers/mask_former_panoptic_dataset_mapper.py @@ -0,0 +1,171 @@ +""" +This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”). +All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. + +Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/dataset_mappers/mask_former_panoptic_dataset_mapper.py +""" + +import copy +import logging + +import numpy as np +import torch +from torch.nn import functional as F + +from detectron2.config import configurable +from detectron2.data import detection_utils as utils +from detectron2.data import transforms as T +from detectron2.structures import BitMasks, Instances + +from .mask_former_semantic_dataset_mapper import MaskFormerSemanticDatasetMapper + +__all__ = ["MaskFormerPanopticDatasetMapper"] + + +class MaskFormerPanopticDatasetMapper(MaskFormerSemanticDatasetMapper): + """ + A callable which takes a dataset dict in Detectron2 Dataset format, + and map it into a format used by MaskFormer for panoptic segmentation. + + The callable currently does the following: + + 1. Read the image from "file_name" + 2. Applies geometric transforms to the image and annotation + 3. Find and applies suitable cropping to the image and annotation + 4. Prepare image and annotation to Tensors + """ + + @configurable + def __init__( + self, + is_train=True, + *, + augmentations, + image_format, + ignore_label, + size_divisibility, + ): + """ + NOTE: this interface is experimental. + Args: + is_train: for training or inference + augmentations: a list of augmentations or deterministic transforms to apply + image_format: an image format supported by :func:`detection_utils.read_image`. + ignore_label: the label that is ignored to evaluation + size_divisibility: pad image size to be divisible by this value + """ + super().__init__( + is_train, + augmentations=augmentations, + image_format=image_format, + ignore_label=ignore_label, + size_divisibility=size_divisibility, + ) + + def __call__(self, dataset_dict): + """ + Args: + dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. + + Returns: + dict: a format that builtin models in detectron2 accept + """ + assert self.is_train, "MaskFormerPanopticDatasetMapper should only be used for training!" + + dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below + image = utils.read_image(dataset_dict["file_name"], format=self.img_format) + utils.check_image_size(dataset_dict, image) + + # semantic segmentation + if "sem_seg_file_name" in dataset_dict: + # PyTorch transformation not implemented for uint16, so converting it to double first + sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name")).astype("double") + else: + sem_seg_gt = None + + # panoptic segmentation + if "pan_seg_file_name" in dataset_dict: + pan_seg_gt = utils.read_image(dataset_dict.pop("pan_seg_file_name"), "RGB") + segments_info = dataset_dict["segments_info"] + else: + pan_seg_gt = None + segments_info = None + + if pan_seg_gt is None: + raise ValueError( + "Cannot find 'pan_seg_file_name' for panoptic segmentation dataset {}.".format( + dataset_dict["file_name"] + ) + ) + + aug_input = T.AugInput(image, sem_seg=sem_seg_gt) + aug_input, transforms = T.apply_transform_gens(self.tfm_gens, aug_input) + image = aug_input.image + if sem_seg_gt is not None: + sem_seg_gt = aug_input.sem_seg + + # apply the same transformation to panoptic segmentation + pan_seg_gt = transforms.apply_segmentation(pan_seg_gt) + + from panopticapi.utils import rgb2id + + pan_seg_gt = rgb2id(pan_seg_gt) + + # Pad image and segmentation label here! + image = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) + if sem_seg_gt is not None: + sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) + pan_seg_gt = torch.as_tensor(pan_seg_gt.astype("long")) + + if self.size_divisibility > 0: + image_size = (image.shape[-2], image.shape[-1]) + padding_size = [ + 0, + self.size_divisibility - image_size[1], + 0, + self.size_divisibility - image_size[0], + ] + image = F.pad(image, padding_size, value=128).contiguous() + if sem_seg_gt is not None: + sem_seg_gt = F.pad(sem_seg_gt, padding_size, value=self.ignore_label).contiguous() + pan_seg_gt = F.pad( + pan_seg_gt, padding_size, value=0 + ).contiguous() # 0 is the VOID panoptic label + + image_shape = (image.shape[-2], image.shape[-1]) # h, w + + # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, + # but not efficient on large generic data structures due to the use of pickle & mp.Queue. + # Therefore it's important to use torch.Tensor. + dataset_dict["image"] = image + if sem_seg_gt is not None: + dataset_dict["sem_seg"] = sem_seg_gt.long() + + if "annotations" in dataset_dict: + raise ValueError("Pemantic segmentation dataset should not have 'annotations'.") + + # Prepare per-category binary masks + pan_seg_gt = pan_seg_gt.numpy() + instances = Instances(image_shape) + classes = [] + masks = [] + for segment_info in segments_info: + class_id = segment_info["category_id"] + if not segment_info["iscrowd"]: + classes.append(class_id) + masks.append(pan_seg_gt == segment_info["id"]) + + classes = np.array(classes) + instances.gt_classes = torch.tensor(classes, dtype=torch.int64) + if len(masks) == 0: + # Some image does not have annotation (all ignored) + instances.gt_masks = torch.zeros((0, pan_seg_gt.shape[-2], pan_seg_gt.shape[-1])) + else: + masks = BitMasks( + torch.stack([torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks]) + ) + instances.gt_masks = masks.tensor + + dataset_dict["instances"] = instances + + return dataset_dict diff --git a/mask_adapter/data/dataset_mappers/mask_former_semantic_dataset_mapper.py b/mask_adapter/data/dataset_mappers/mask_former_semantic_dataset_mapper.py new file mode 100644 index 0000000000000000000000000000000000000000..c16e1f3e68c9522f51df6fb1ca3663bdf0242df7 --- /dev/null +++ b/mask_adapter/data/dataset_mappers/mask_former_semantic_dataset_mapper.py @@ -0,0 +1,215 @@ +""" +This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”). +All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. + +Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/dataset_mappers/mask_former_semantic_dataset_mapper.py +""" + +import copy +import logging + +import numpy as np +import torch +from torch.nn import functional as F + +from detectron2.config import configurable +from detectron2.data import MetadataCatalog +from detectron2.data import detection_utils as utils +from detectron2.data import transforms as T +from detectron2.projects.point_rend import ColorAugSSDTransform +from detectron2.structures import BitMasks, Instances + +__all__ = ["MaskFormerSemanticDatasetMapper"] + + +class MaskFormerSemanticDatasetMapper: + """ + A callable which takes a dataset dict in Detectron2 Dataset format, + and map it into a format used by MaskFormer for semantic segmentation. + + The callable currently does the following: + + 1. Read the image from "file_name" + 2. Applies geometric transforms to the image and annotation + 3. Find and applies suitable cropping to the image and annotation + 4. Prepare image and annotation to Tensors + """ + + @configurable + def __init__( + self, + is_train=True, + *, + augmentations, + image_format, + ignore_label, + size_divisibility, + ): + """ + NOTE: this interface is experimental. + Args: + is_train: for training or inference + augmentations: a list of augmentations or deterministic transforms to apply + image_format: an image format supported by :func:`detection_utils.read_image`. + ignore_label: the label that is ignored to evaluation + size_divisibility: pad image size to be divisible by this value + """ + self.is_train = is_train + self.tfm_gens = augmentations + self.img_format = image_format + self.ignore_label = ignore_label + self.size_divisibility = size_divisibility + + logger = logging.getLogger(__name__) + mode = "training" if is_train else "inference" + logger.info(f"[{self.__class__.__name__}] Augmentations used in {mode}: {augmentations}") + + @classmethod + def from_config(cls, cfg, is_train=True): + # Build augmentation + # augs = [ + # T.ResizeShortestEdge( + # cfg.INPUT.MIN_SIZE_TRAIN, + # cfg.INPUT.MAX_SIZE_TRAIN, + # cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING, + # ) + # ] + # if cfg.INPUT.CROP.ENABLED: + # augs.append( + # T.RandomCrop_CategoryAreaConstraint( + # cfg.INPUT.CROP.TYPE, + # cfg.INPUT.CROP.SIZE, + # cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA, + # cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE, + # ) + # ) + # if cfg.INPUT.COLOR_AUG_SSD: + # augs.append(ColorAugSSDTransform(img_format=cfg.INPUT.FORMAT)) + # augs.append(T.RandomFlip()) + + image_size = cfg.INPUT.IMAGE_SIZE + min_scale = cfg.INPUT.MIN_SCALE + max_scale = cfg.INPUT.MAX_SCALE + + augmentation = [] + + if cfg.INPUT.RANDOM_FLIP != "none": + augmentation.append( + T.RandomFlip( + horizontal=cfg.INPUT.RANDOM_FLIP == "horizontal", + vertical=cfg.INPUT.RANDOM_FLIP == "vertical", + ) + ) + + augmentation.extend([ + T.ResizeScale( + min_scale=min_scale, max_scale=max_scale, target_height=image_size, target_width=image_size + ), + T.FixedSizeCrop(crop_size=(image_size, image_size)), + ]) + + # Assume always applies to the training set. + dataset_names = cfg.DATASETS.TRAIN + meta = MetadataCatalog.get(dataset_names[0]) + ignore_label = meta.ignore_label + + ret = { + "is_train": is_train, + "augmentations": augmentation, + "image_format": cfg.INPUT.FORMAT, + "ignore_label": ignore_label, + "size_divisibility": cfg.INPUT.SIZE_DIVISIBILITY, + } + return ret + + def __call__(self, dataset_dict): + """ + Args: + dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. + + Returns: + dict: a format that builtin models in detectron2 accept + """ + assert self.is_train, "MaskFormerSemanticDatasetMapper should only be used for training!" + + dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below + image = utils.read_image(dataset_dict["file_name"], format=self.img_format) + utils.check_image_size(dataset_dict, image) + + file_name = dataset_dict["file_name"] + if "stuff" in file_name: + dataset_dict["dataname"] = "openvocab_coco_2017_train_stuff_sem_seg" + + if "sem_seg_file_name" in dataset_dict: + # PyTorch transformation not implemented for uint16, so converting it to double first + sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name")).astype("double") + else: + sem_seg_gt = None + + if sem_seg_gt is None: + raise ValueError( + "Cannot find 'sem_seg_file_name' for semantic segmentation dataset {}.".format( + dataset_dict["file_name"] + ) + ) + + aug_input = T.AugInput(image, sem_seg=sem_seg_gt) + aug_input, transforms = T.apply_transform_gens(self.tfm_gens, aug_input) + image = aug_input.image + sem_seg_gt = aug_input.sem_seg + + # Pad image and segmentation label here! + image = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1))) + if sem_seg_gt is not None: + sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) + + if self.size_divisibility > 0: + image_size = (image.shape[-2], image.shape[-1]) + padding_size = [ + 0, + self.size_divisibility - image_size[1], + 0, + self.size_divisibility - image_size[0], + ] + image = F.pad(image, padding_size, value=128).contiguous() + if sem_seg_gt is not None: + sem_seg_gt = F.pad(sem_seg_gt, padding_size, value=self.ignore_label).contiguous() + + image_shape = (image.shape[-2], image.shape[-1]) # h, w + + # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, + # but not efficient on large generic data structures due to the use of pickle & mp.Queue. + # Therefore it's important to use torch.Tensor. + dataset_dict["image"] = image + + if sem_seg_gt is not None: + dataset_dict["sem_seg"] = sem_seg_gt.long() + + if "annotations" in dataset_dict: + raise ValueError("Semantic segmentation dataset should not have 'annotations'.") + + # Prepare per-category binary masks + if sem_seg_gt is not None: + sem_seg_gt = sem_seg_gt.numpy() + instances = Instances(image_shape) + classes = np.unique(sem_seg_gt) + # remove ignored region + classes = classes[classes != self.ignore_label] + instances.gt_classes = torch.tensor(classes, dtype=torch.int64) + + masks = [] + for class_id in classes: + masks.append(sem_seg_gt == class_id) + + if len(masks) == 0: + # Some image does not have annotation (all ignored) + instances.gt_masks = torch.zeros((0, sem_seg_gt.shape[-2], sem_seg_gt.shape[-1])) + else: + masks = BitMasks( + torch.stack([torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks]) + ) + instances.gt_masks = masks.tensor + + dataset_dict["instances"] = instances + + return dataset_dict diff --git a/mask_adapter/data/datasets/__init__.py b/mask_adapter/data/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f14dc2bbe3f53c9c50a7ee44d8bf7d324d3cd2d4 --- /dev/null +++ b/mask_adapter/data/datasets/__init__.py @@ -0,0 +1,35 @@ +""" +Copyright (2023) Bytedance Ltd. and/or its affiliates + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +from . import ( + register_coco_panoptic_annos_semseg, + register_ade20k_panoptic, + register_cityscapes_panoptic, + register_mapillary_vistas_panoptic, + register_ade20k_full, + register_pascal_voc_20_semantic, + register_pascal_voc_21_semantic, + register_pascal_ctx_59_sem_seg, + register_pascal_ctx_459_sem_seg, + register_coco_instance, + register_ade20k_instance, + register_coco_stuff_164k, + #register_all_grand, + openseg_classes +) + +#from .register_grand_data import register_all_grand +# from .register_objects365 import register_all_obj365v1 \ No newline at end of file diff --git a/mask_adapter/data/datasets/__pycache__/__init__.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..94a5f0c47c0535f01ba8f016d3abc7c9b8b8af29 Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/__init__.cpython-310.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/__init__.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ffac200bf3f821b33e4d5b0854880a4a2cdf1599 Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/__init__.cpython-38.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/openseg_classes.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/openseg_classes.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8ddb552db7b3eac4ac6f0d3037d3271bd4a12495 Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/openseg_classes.cpython-310.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/openseg_classes.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/openseg_classes.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7007fd1f10f17e5bfed83cf1a138e22f43707a0c Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/openseg_classes.cpython-38.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_ade20k_full.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/register_ade20k_full.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bd1aec844dd1fe9d4a7153531c9f1d089a14aced Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_ade20k_full.cpython-310.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_ade20k_full.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_ade20k_full.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c1579db2e742ada067999124f32cc483d1233b07 Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_ade20k_full.cpython-38.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_ade20k_instance.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/register_ade20k_instance.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c2a478866cc396a93e04866f3a38e45f3bca45ad Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_ade20k_instance.cpython-310.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_ade20k_instance.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_ade20k_instance.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6dc80f3eb5b38d5736ae3acf60574128e7a26b7b Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_ade20k_instance.cpython-38.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_ade20k_panoptic.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/register_ade20k_panoptic.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d8077364b50cbe4e5f7345da63c96dc1012deab4 Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_ade20k_panoptic.cpython-310.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_ade20k_panoptic.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_ade20k_panoptic.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..58673df0b561ec33a30751494a4f09d8b25f89c8 Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_ade20k_panoptic.cpython-38.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_cityscapes_panoptic.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/register_cityscapes_panoptic.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8668d2e358c3edd3a8e36d89f8597a2414f0e4e8 Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_cityscapes_panoptic.cpython-310.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_cityscapes_panoptic.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_cityscapes_panoptic.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6a95f6a4812bfb4aada31a43fd53926e376f19f4 Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_cityscapes_panoptic.cpython-38.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_coco_instance.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/register_coco_instance.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ff1e9d4994a9330d63974899731758000be55694 Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_coco_instance.cpython-310.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_coco_instance.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_coco_instance.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7b64e0cc3d8323139df05bd771dfc83386838e5e Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_coco_instance.cpython-38.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_coco_panoptic_annos_semseg.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/register_coco_panoptic_annos_semseg.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..844daa84e62659558cf04fbaf2073ec04713bb5b Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_coco_panoptic_annos_semseg.cpython-310.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_coco_panoptic_annos_semseg.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_coco_panoptic_annos_semseg.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b81eee86727769719fb1eafa42c58d8b02f4a400 Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_coco_panoptic_annos_semseg.cpython-38.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_coco_stuff_164k.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/register_coco_stuff_164k.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9fadf4bc78a8e95843135bfa9d1c642eb0c69446 Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_coco_stuff_164k.cpython-310.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_coco_stuff_164k.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_coco_stuff_164k.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7aef74e4819ec3e8e1a2f25962957af2b0a4ae47 Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_coco_stuff_164k.cpython-38.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_grand_data.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_grand_data.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7697404d2867ab899fa2d1ced0f6e2c57c824e21 Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_grand_data.cpython-38.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_mapillary_vistas_panoptic.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/register_mapillary_vistas_panoptic.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..320004295f5db22d597c24f66925f67b431ded57 Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_mapillary_vistas_panoptic.cpython-310.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_mapillary_vistas_panoptic.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_mapillary_vistas_panoptic.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9fd8120f4b317a43cd6fe8af07d44658246bfe09 Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_mapillary_vistas_panoptic.cpython-38.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_objects365.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/register_objects365.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6b0b3850259c5af6344e7ebbc365c6ca10f25d57 Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_objects365.cpython-310.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_objects365.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_objects365.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cf7318f8c4a55b1670ebe58b2a5d5106c357e486 Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_objects365.cpython-38.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_pascal_ctx_459_sem_seg.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/register_pascal_ctx_459_sem_seg.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1efc99f2fb5ee4b15022185d22c93e6588c64416 Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_pascal_ctx_459_sem_seg.cpython-310.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_pascal_ctx_459_sem_seg.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_pascal_ctx_459_sem_seg.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1656d3617cf6767c95bf7a4aff7a588e0c6849c6 Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_pascal_ctx_459_sem_seg.cpython-38.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_pascal_ctx_59_sem_seg.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/register_pascal_ctx_59_sem_seg.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..89154682e470bb3f1718dcc1e33a3dbd1612dc11 Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_pascal_ctx_59_sem_seg.cpython-310.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_pascal_ctx_59_sem_seg.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_pascal_ctx_59_sem_seg.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b8e6ef8890da03a2376174e9d7537da846fa93f0 Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_pascal_ctx_59_sem_seg.cpython-38.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_pascal_voc_20_semantic.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/register_pascal_voc_20_semantic.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..29ce945a2b27bb16fe34cd67548996f94402d21b Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_pascal_voc_20_semantic.cpython-310.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_pascal_voc_20_semantic.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_pascal_voc_20_semantic.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8e8d0552042ee1b1cf3884a4d5ea2a92ab6d9ea1 Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_pascal_voc_20_semantic.cpython-38.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_pascal_voc_21_semantic.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/register_pascal_voc_21_semantic.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..966c9ed6043a6a5414809f46b7eceda554f43504 Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_pascal_voc_21_semantic.cpython-310.pyc differ diff --git a/mask_adapter/data/datasets/__pycache__/register_pascal_voc_21_semantic.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_pascal_voc_21_semantic.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fc3e96834bde4721a8a96b506c73e42dc564c5c3 Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_pascal_voc_21_semantic.cpython-38.pyc differ diff --git a/mask_adapter/data/datasets/ade20k_150_with_prompt_eng.txt b/mask_adapter/data/datasets/ade20k_150_with_prompt_eng.txt new file mode 100644 index 0000000000000000000000000000000000000000..5226edfa39dd317be992f30c661185aa030c8180 --- /dev/null +++ b/mask_adapter/data/datasets/ade20k_150_with_prompt_eng.txt @@ -0,0 +1,151 @@ +0:invalid_class_id +1:wall,walls,brick wall,stone wall,interior wall +2:building,buildings,edifice,edifices +3:sky,clouds +4:floor,flooring +5:tree,trees +6:ceiling +7:road,route,street,roads,streets,routes +8:bed,beds +9:windowpane,window,windows +10:grass,grass field +11:cabinet,cabinets,wall mounted cabine +12:sidewalk,pavement +13:person,child,girl,boy,woman,man,people,children,girls,boys,women,men +14:earth,ground +15:door,double door,doors +16:table,tables,tablecloth +17:mountain,mount,mountains +18:plant,flora,plant life,plants,bushes +19:curtain,drape,drapery,mantle,pall +20:chair,chairs +21:car,automobile,cars +22:water +23:painting,picture,paintings,pictures,wallart,framed canvas +24:sofa,couch,sofas,couches +25:shelf,shelves +26:house exterior +27:sea,ocean +28:mirror,mirrors +29:rug,carpet,carpeting +30:field +31:armchair,armchairs +32:seat,seats +33:fence,fencing +34:desk,desks +35:rock,stone,rocks,stones +36:wardrobe,closet,press,wardrobes,closets +37:lamp,lamps +38:bathtub,bathing tub,bath,tub +39:railing,rail +40:cushion,cushions +41:pedestal +42:box,boxes +43:column,pillar +44:signboard,sign,signboards,signs +45:chest of drawers,chest,bureau,dresser +46:counter +47:sand +48:sink +49:skyscraper,skyscrapers +50:fireplace,hearth,open fireplace +51:refrigerator,icebox +52:grandstand,covered stand +53:path +54:stairs,steps +55:runway +56:case,display case,showcase,vitrine +57:pool table,billiard table,snooker table +58:pillow,pillows +59:screen door,shower door +60:stairway,staircase +61:river +62:bridge,span +63:bookcase +64:window screen,door screen +65:coffee table,cocktail table +66:toilet,commode,crapper,potty +67:flower,flowers +68:book,books +69:hill +70:bench,benches +71:countertop,counter top,worktop +72:stove,kitchen stove,kitchen range,kitchen range,cooking stove +73:palm tree,palm trees +74:kitchen island +75:computer,computing machine,computing device,data processor,electronic computer,information processing system +76:swivel chair +77:boat +78:bar +79:arcade machine,arcade machines +80:hovel,hut,hutch,shack,shanty +81:bus,autobus,double-decker,jitney,motorbus,motorcoach,omnibus,passenger vehicle +82:towel +83:light bulb,lightbulb,bulb,incandescent lamp,electric light,electric-light bulb +84:truck,motortruck +85:tower,towers +86:chandelier,pendant,pendent +87:awning,sunshade,sunblind +88:streetlight,street lamp +89:booth,cubicle,stall,kiosk +90:television receiver,television,television set,tv,tv set +91:airplane,aeroplane,airplanes,aeroplanes +92:dirt track +93:apparel,wearing apparel,dress,clothes +94:pole +95:land,soil +96:bannister,banister,balustrade,balusters,handrail +97:escalator,moving staircase,moving stairway +98:ottoman,pouf,pouffe,puff,hassock +99:bottle,bottles,water bottle +100:buffet,sideboard +101:poster,posting,placard,notice,bill,card +102:stage +103:van +104:ship +105:fountain +106:conveyer belt,conveyor belt,conveyer,conveyor,transporter +107:canopy +108:washer,automatic washer,washing machine +109:plaything,toy,toys +110:swimming pool,swimming bath +111:stool,stools +112:barrel,cask,barrels,casks +113:basket,handbasket +114:waterfall,falls +115:tent,collapsible shelter +116:bag,bags,gift bag,paper bag +117:minibike,motorbike +118:cradle +119:oven +120:ball,balls +121:food,solid food +122:step,stair +123:tank,storage tank +124:trade name,brand name,brand,marque +125:microwave,microwave oven +126:plant pots,plant pot,flower pot,flowerpot,planter +127:animal,animate being,dog,cat,horse,cow,sheep,zebra,girraffe,bird +128:bicycle,bike +129:lake +130:dishwasher,dish washer,dishwashing machine +131:projection screen +132:blanket,cover +133:sculpture,sculptures +134:exhaust hood +135:sconce,sconce lamp,sconce light +136:vase,vases +137:traffic light,traffic signal,traffic lights +138:tray,trays +139:ashcan,trash can,garbage can,wastebin,ash bin,ash-bin,ashbin,dustbin,trash barrel,trash bin +140:ceiling fan,floor fan +141:pier,wharf,wharfage,dock +142:crt screen +143:plate,plates +144:monitor,monitoring device,monitors +145:bulletin board,notice board +146:shower +147:radiator +148:cup,cups,drinking glass,drinking glasses +149:clock +150:flag,flags \ No newline at end of file diff --git a/mask_adapter/data/datasets/ade20k_847_with_prompt_eng.txt b/mask_adapter/data/datasets/ade20k_847_with_prompt_eng.txt new file mode 100644 index 0000000000000000000000000000000000000000..b7631743521190368745270b3fd73d418040d12a --- /dev/null +++ b/mask_adapter/data/datasets/ade20k_847_with_prompt_eng.txt @@ -0,0 +1,848 @@ +0:invalid_class_id +1:wall,walls,interior wall,brick wall,stone wall +2:building,buildings,edifice,edifices +3:sky,clouds +4:tree,trees +5:road,route,street,roads,streets,routes +6:floor,flooring +7:ceiling +8:bed,beds +9:sidewalk,pavement +10:earth,ground +11:cabinet,cabinets,wall mounted cabine +12:person,child,girl,boy,woman,man,people,children,girls,boys,women,men +13:grass,grass field +14:windowpane,window,windows +15:car,automobile,cars +16:mountain,mount,mountains +17:plant,flora,plant life,plants,bushes +18:table,tables,tablecloth +19:chair,chairs +20:curtain,drape,drapery,mantle,pall +21:door,double door,doors +22:sofa,couch,sofas,couches +23:sea,ocean +24:painting,picture,paintings,pictures,wallart,framed canvas +25:water +26:mirror,mirrors +27:house exterior +28:rug,carpet,carpeting +29:shelf,shelves +30:armchair,armchairs +31:fence,fencing +32:field +33:lamp,lamps +34:rock,stone,rocks,stones +35:seat,seats +36:river +37:desk,desks +38:bathtub,bathing tub,bath,tub +39:railing,rail +40:signboard,sign,signboards,signs +41:cushion,cushions +42:path +43:work surface +44:stairs,steps +45:column,pillar +46:sink +47:wardrobe,closet,press,wardrobes,closets +48:snow +49:refrigerator,icebox +50:pedestal +51:bridge,span +52:blind +53:runway +54:cliff,drop,drop-off +55:sand +56:fireplace,hearth,open fireplace +57:pillow,pillows +58:screen door,shower door +59:toilet,commode,crapper,potty +60:skyscraper,skyscrapers +61:grandstand,covered stand +62:box,boxes +63:pool table,billiard table,snooker table +64:palm tree,palm trees +65:double door +66:coffee table,cocktail table +67:counter +68:countertop,counter top,worktop +69:chest of drawers,chest,bureau,dresser +70:kitchen island +71:boat +72:waterfall,falls +73:stove,kitchen stove,kitchen range,kitchen range,cooking stove +74:flower,flowers +75:bookcase +76:controls +77:book,books +78:stairway,staircase +79:streetlight,street lamp +80:computer,computing machine,computing device,data processor,electronic computer,information processing system +81:bus,autobus,double-decker,jitney,motorbus,motorcoach,omnibus,passenger vehicle +82:swivel chair +83:light,light source +84:bench,benches +85:case,display case,showcase,vitrine +86:towel +87:fountain +88:embankment +89:television receiver,television,television set,tv,tv set +90:van +91:hill +92:awning,sunshade,sunblind +93:poster,posting,placard,notice,bill,card +94:truck,motortruck +95:airplane,aeroplane,airplanes,aeroplanes +96:pole +97:tower,towers +98:court +99:ball,balls +100:aircraft carrier,carrier,flattop,attack aircraft carrier +101:buffet,sideboard +102:hovel,hut,hutch,shack,shanty +103:apparel,wearing apparel,dress,clothes +104:minibike,motorbike +105:animal,animate being,dog,cat,horse,cow,sheep,zebra,giraffe,bird +106:chandelier,pendant,pendent +107:step,stair +108:booth,cubicle,stall,kiosk +109:bicycle,bike +110:doorframe,doorcase +111:sconce,sconce lamp,sconce light +112:pond +113:trade name,brand name +114:bannister,banister,balustrade,balusters,handrail +115:bag,bags,gift bag,paper bag +116:traffic light,traffic signal,traffic lights +117:gazebo +118:escalator,moving staircase,moving stairway +119:land,soil +120:board,plank +121:arcade machine,arcade machines +122:eiderdown,duvet,continental quilt +123:bar +124:stall,stand,sales booth +125:playground +126:ship +127:ottoman,pouf,pouffe,puff,hassock +128:ashcan,trash can,garbage can,wastebin,ash bin,ash-bin,ashbin,dustbin,trash barrel,trash bin +129:bottle,bottles,water bottle +130:cradle +131:pot,flowerpot +132:conveyer belt,conveyor belt,conveyer,conveyor,transporter +133:train,railroad train +134:stool,stools +135:lake +136:tank,storage tank +137:ice,water ice +138:basket,handbasket +139:manhole +140:tent,collapsible shelter +141:canopy +142:microwave,microwave oven +143:barrel,cask,barrels,casks +144:dirt track +145:beam +146:dishwasher,dish washer,dishwashing machine +147:plate,plates +148:crt screen +149:ruins +150:washer,automatic washer,washing machine +151:blanket,cover +152:plaything,toy,toys +153:food,solid food +154:projection screen +155:oven +156:stage +157:beacon,lighthouse,beacon light,pharos +158:umbrella +159:sculpture,sculptures +160:aqueduct +161:container +162:scaffolding,staging +163:exhaust hood +164:curb,curbing,kerb +165:roller coaster +166:horse,equus caballus +167:catwalk +168:glass,drinking glass +169:vase,vases +170:central reservation +171:carousel +172:radiator +173:closet +174:machine +175:pier,wharf,wharfage,dock +176:ceiling fan,floor fan +177:inflatable bounce game +178:pitch +179:paper +180:arcade,colonnade +181:hot tub +182:helicopter +183:tray,trays +184:partition,divider +185:vineyard +186:bowl +187:bullring +188:flag,flags +189:pot +190:footbridge,overcrossing,pedestrian bridge +191:shower +192:bag,traveling bag,travelling bag,grip,suitcase +193:bulletin board,notice board +194:confessional booth +195:trunk,tree trunk,bole +196:forest +197:elevator door +198:laptop,laptop computer +199:instrument panel +200:bucket,pail +201:tapestry,tapis +202:platform +203:jacket +204:gate +205:monitor,monitoring device,monitors +206:telephone booth,phone booth,call box,telephone box,telephone kiosk +207:spotlight,spot +208:ring +209:control panel +210:blackboard,chalkboard +211:air conditioner,air conditioning +212:chest +213:clock +214:sand dune +215:pipe,pipage,piping +216:vault +217:table football +218:cannon +219:swimming pool,swimming bath +220:fluorescent,fluorescent fixture +221:statue +222:loudspeaker,speaker,speaker unit,loudspeaker system,speaker system +223:exhibitor +224:ladder +225:carport +226:dam +227:pulpit +228:skylight,fanlight +229:water tower +230:grill,grille,grillwork +231:display board +232:pane,pane of glass,window glass +233:rubbish,trash,scrap +234:ice rink +235:fruit +236:patio +237:vending machine +238:telephone,phone,telephone set +239:net +240:backpack,back pack,knapsack,packsack,rucksack,haversack +241:jar +242:track +243:magazine +244:shutter +245:roof +246:banner,streamer +247:landfill +248:post +249:altarpiece,reredos +250:hat,chapeau,lid +251:arch,archway +252:table game +253:bag,handbag,pocketbook,purse +254:document,written document,papers +255:dome +256:pier +257:shanties +258:forecourt +259:crane +260:dog,domestic dog,canis familiaris +261:piano,pianoforte,forte-piano +262:drawing +263:cabin +264:ad,advertisement,advertizement,advertising,advertizing,advert +265:amphitheater,amphitheatre,coliseum +266:monument +267:henhouse +268:cockpit +269:heater,warmer +270:windmill,aerogenerator,wind generator +271:pool +272:elevator,lift +273:decoration,ornament,ornamentation +274:labyrinth +275:text,textual matter +276:printer +277:mezzanine,first balcony +278:mattress +279:straw +280:stalls +281:patio,terrace +282:billboard,hoarding +283:bus stop +284:trouser,pant +285:console table,console +286:rack +287:notebook +288:shrine +289:pantry +290:cart +291:steam shovel +292:porch +293:postbox,mailbox,letter box +294:figurine,statuette +295:recycling bin +296:folding screen +297:telescope +298:deck chair,beach chair +299:kennel +300:coffee maker +301:altar,communion table,lord's table +302:fish +303:easel +304:artificial golf green +305:iceberg +306:candlestick,candle holder +307:shower stall,shower bath +308:television stand +309:wall socket,wall plug,electric outlet,electrical outlet,outlet,electric receptacle +310:skeleton +311:grand piano,grand +312:candy,confect +313:grille door +314:pedestal,plinth,footstall +315:jersey,t-shirt,tee shirt +316:shoe +317:gravestone,headstone,tombstone +318:shanty +319:structure +320:rocking chair,rocker +321:bird +322:place mat +323:tomb +324:big top +325:gas pump,gasoline pump,petrol pump,island dispenser +326:lockers +327:cage +328:finger +329:bleachers +330:ferris wheel +331:hairdresser chair +332:mat +333:stands +334:aquarium,fish tank,marine museum +335:streetcar,tram,tramcar,trolley,trolley car +336:napkin,table napkin,serviette +337:dummy +338:booklet,brochure,folder,leaflet,pamphlet +339:sand trap +340:shop,store +341:table cloth +342:service station +343:coffin +344:drawer +345:cages +346:slot machine,coin machine +347:balcony +348:volleyball court +349:table tennis +350:control table +351:shirt +352:merchandise,ware,product +353:railway +354:parterre +355:chimney +356:can,tin,tin can +357:tanks +358:fabric,cloth,material,textile +359:alga,algae +360:system +361:map +362:greenhouse +363:mug +364:barbecue +365:trailer +366:toilet tissue,toilet paper,bathroom tissue +367:organ +368:dishrag,dishcloth +369:island +370:keyboard +371:trench +372:basket,basketball hoop,hoop +373:steering wheel,wheel +374:pitcher,ewer +375:goal +376:bread,breadstuff,staff of life +377:beds +378:wood +379:file cabinet +380:newspaper,paper +381:motorboat +382:rope +383:guitar +384:rubble +385:scarf +386:barrels +387:cap +388:leaves +389:control tower +390:dashboard +391:bandstand +392:lectern +393:switch,electric switch,electrical switch +394:baseboard,mopboard,skirting board +395:shower room +396:smoke +397:faucet,spigot +398:bulldozer +399:saucepan +400:shops +401:meter +402:crevasse +403:gear +404:candelabrum,candelabra +405:sofa bed +406:tunnel +407:pallet +408:wire,conducting wire +409:kettle,boiler +410:bidet +411:baby buggy,baby carriage,carriage,perambulator,pram,stroller,go-cart,pushchair,pusher +412:music stand +413:pipe,tube +414:cup,cups,drinking glass,drinking glasses +415:parking meter +416:ice hockey rink +417:shelter +418:weeds +419:temple +420:patty,cake +421:ski slope +422:panel +423:wallet +424:wheel +425:towel rack,towel horse +426:roundabout +427:canister,cannister,tin +428:rod +429:soap dispenser +430:bell +431:canvas +432:box office,ticket office,ticket booth +433:teacup +434:trellis +435:workbench +436:valley,vale +437:toaster +438:knife +439:podium +440:ramp +441:tumble dryer +442:fireplug,fire hydrant,plug +443:gym shoe,sneaker,tennis shoe +444:lab bench +445:equipment +446:rocky formation +447:plastic +448:calendar +449:caravan +450:check-in-desk +451:ticket counter +452:brush +453:mill +454:covered bridge +455:bowling alley +456:hanger +457:excavator +458:trestle +459:revolving door +460:blast furnace +461:scale,weighing machine +462:projector +463:soap +464:locker +465:tractor +466:stretcher +467:frame +468:grating +469:alembic +470:candle,taper,wax light +471:barrier +472:cardboard +473:cave +474:puddle +475:tarp +476:price tag +477:watchtower +478:meters +479:light bulb,bulb,bulbs +480:tracks +481:hair dryer +482:skirt +483:viaduct +484:paper towel +485:coat +486:sheet +487:fire extinguisher,extinguisher,asphyxiator +488:water wheel +489:pottery,clayware +490:magazine rack +491:teapot +492:microphone,mike +493:support +494:forklift +495:canyon +496:cash register,register +497:leaf,leafage,foliage +498:remote control,remote +499:soap dish +500:windshield,windscreen +501:cat +502:cue,cue stick,pool cue,pool stick +503:vent,venthole,vent-hole,blowhole +504:videos +505:shovel +506:eaves +507:antenna,aerial,transmitting aerial +508:shipyard +509:hen,biddy +510:traffic cone +511:washing machines +512:truck crane +513:cds +514:niche +515:scoreboard +516:briefcase +517:boot +518:sweater,jumper +519:hay +520:pack +521:bottle rack +522:glacier +523:pergola +524:building materials +525:television camera +526:first floor +527:rifle +528:tennis table +529:stadium +530:safety belt +531:cover +532:dish rack +533:synthesizer +534:pumpkin +535:gutter +536:fruit stand +537:ice floe,floe +538:handle,grip,handgrip,hold +539:wheelchair +540:mousepad,mouse mat +541:diploma +542:fairground ride +543:radio +544:hotplate +545:junk +546:wheelbarrow +547:stream +548:toll plaza +549:punching bag +550:trough +551:throne +552:chair desk +553:weighbridge +554:extractor fan +555:hanging clothes +556:dish,dish aerial,dish antenna,saucer +557:alarm clock,alarm +558:ski lift +559:chain +560:garage +561:mechanical shovel +562:wine rack +563:tramway +564:treadmill +565:menu +566:block +567:well +568:witness stand +569:branch +570:duck +571:casserole +572:frying pan +573:desk organizer +574:mast +575:spectacles,specs,eyeglasses,glasses +576:service elevator +577:dollhouse +578:hammock +579:clothes hanging +580:photocopier +581:notepad +582:golf cart +583:footpath +584:cross +585:baptismal font +586:boiler +587:skip +588:rotisserie +589:tables +590:water mill +591:helmet +592:cover curtain +593:brick +594:table runner +595:ashtray +596:street box +597:stick +598:hangers +599:cells +600:urinal +601:centerpiece +602:portable fridge +603:dvds +604:golf club +605:skirting board +606:water cooler +607:clipboard +608:camera,photographic camera +609:pigeonhole +610:chips +611:food processor +612:post box +613:lid +614:drum +615:blender +616:cave entrance +617:dental chair +618:obelisk +619:canoe +620:mobile +621:monitors +622:pool ball +623:cue rack +624:baggage carts +625:shore +626:fork +627:paper filer +628:bicycle rack +629:coat rack +630:garland +631:sports bag +632:fish tank +633:towel dispenser +634:carriage +635:brochure +636:plaque +637:stringer +638:iron +639:spoon +640:flag pole +641:toilet brush +642:book stand +643:water faucet,water tap,tap,hydrant +644:ticket office +645:broom +646:dvd +647:ice bucket +648:carapace,shell,cuticle,shield +649:tureen +650:folders +651:chess +652:root +653:sewing machine +654:model +655:pen +656:violin +657:sweatshirt +658:recycling materials +659:mitten +660:chopping board,cutting board +661:mask +662:log +663:mouse,computer mouse +664:grill +665:hole +666:target +667:trash bag +668:chalk +669:sticks +670:balloon +671:score +672:hair spray +673:roll +674:runner +675:engine +676:inflatable glove +677:games +678:pallets +679:baskets +680:coop +681:dvd player +682:rocking horse +683:buckets +684:bread rolls +685:shawl +686:watering can +687:spotlights +688:post-it +689:bowls +690:security camera +691:runner cloth +692:lock +693:alarm,warning device,alarm system +694:side +695:roulette +696:bone +697:cutlery +698:pool balls +699:wheels +700:spice rack +701:plant pots,plant pot,flower pot,flowerpot,planter +702:towel ring +703:bread box +704:video +705:funfair +706:breads +707:tripod +708:ironing board +709:skimmer +710:hollow +711:scratching post +712:tricycle +713:file box +714:mountain pass +715:tombstones +716:cooker +717:card game,cards +718:golf bag +719:towel paper +720:chaise lounge +721:sun +722:toilet paper holder +723:rake +724:key +725:umbrella stand +726:dartboard +727:transformer +728:fireplace utensils +729:sweatshirts +730:cellular telephone,cellular phone,cellphone,cell,mobile phone +731:tallboy +732:stapler +733:sauna +734:test tube +735:palette +736:shopping carts +737:tools +738:push button,push,button +739:star +740:roof rack +741:barbed wire +742:spray +743:ear +744:sponge +745:racket +746:tins +747:eyeglasses +748:file +749:scarfs +750:sugar bowl +751:flip flop +752:headstones +753:laptop bag +754:leash +755:climbing frame +756:suit hanger +757:floor spotlight +758:plate rack +759:sewer +760:hard drive +761:sprinkler +762:tools box +763:necklace +764:bulbs +765:steel industry +766:club +767:jack +768:door bars +769:control panel,instrument panel,control board,board,panel +770:hairbrush +771:napkin holder +772:office +773:smoke detector +774:utensils +775:apron +776:scissors +777:terminal +778:grinder +779:entry phone +780:newspaper stand +781:pepper shaker +782:onions +783:central processing unit,cpu,central processor,processor,mainframe +784:tape +785:bat +786:coaster +787:calculator +788:potatoes +789:luggage rack +790:salt +791:street number +792:viewpoint +793:sword +794:cd +795:rowing machine +796:plug +797:andiron,firedog,dog,dog-iron +798:pepper +799:tongs +800:bonfire +801:dog dish +802:belt +803:dumbbells +804:videocassette recorder,vcr +805:hook +806:envelopes +807:shower faucet +808:watch +809:padlock +810:swimming pool ladder +811:spanners +812:gravy boat +813:notice board +814:trash bags +815:fire alarm +816:ladle +817:stethoscope +818:rocket +819:funnel +820:bowling pins +821:valve +822:thermometer +823:cups +824:spice jar +825:night light +826:soaps +827:games table +828:slotted spoon +829:reel +830:scourer +831:sleeping robe +832:desk mat +833:dumbbell +834:hammer +835:tie +836:typewriter +837:shaker +838:cheese dish +839:sea star +840:racquet +841:butane gas cylinder +842:paper weight +843:shaving brush +844:sunglasses +845:gear shift +846:towel rail +847:adding machine,totalizer,totaliser \ No newline at end of file diff --git a/mask_adapter/data/datasets/cityscapes_with_prompt_eng.txt b/mask_adapter/data/datasets/cityscapes_with_prompt_eng.txt new file mode 100644 index 0000000000000000000000000000000000000000..65fd30673d2008b09ea8dd5ea0be887506c422c4 --- /dev/null +++ b/mask_adapter/data/datasets/cityscapes_with_prompt_eng.txt @@ -0,0 +1,19 @@ +0:road,railroad +1:sidewalk,pavement +2:building,buildings,edifice,edifices,house,ceiling +3:wall,walls,brick wall,stone wall,tile wall,wood wall +4:fence,fences +5:pole,poles +6:traffic light,traffic lights +7:traffic sign,stop sign +8:vegetation,tree,trees,palm tree,bushes +9:terrain,river,sand,sea,snow,water,mountain,grass,dirt,rock +10:sky,clouds +11:person +12:rider +13:car,cars +14:truck,trucks +15:bus,buses +16:train,trains,locomotive,locomotives,freight train +17:motorcycle,motorcycles +18:bicycle,bicycles,bike,bikes \ No newline at end of file diff --git a/mask_adapter/data/datasets/coco_panoptic_with_prompt_eng.txt b/mask_adapter/data/datasets/coco_panoptic_with_prompt_eng.txt new file mode 100644 index 0000000000000000000000000000000000000000..76dc9729374dff8a314638c7a8e0ed0707c78328 --- /dev/null +++ b/mask_adapter/data/datasets/coco_panoptic_with_prompt_eng.txt @@ -0,0 +1,201 @@ +0:invalid_class_id +1:person,child,girl,boy,woman,man,people,children,girls,boys,women,men,lady,guy,ladies,guys,clothes +2:bicycle,bicycles,bike,bikes +3:car,cars +4:motorcycle,motorcycles +5:airplane,airplanes +6:bus,buses +7:train,trains,locomotive,locomotives,freight train +8:truck,trucks +9:boat,boats +10:traffic light +11:fire hydrant +12:invalid_class_id +13:stop sign +14:parking meter +15:bench,benches +16:bird,birds +17:cat,cats,kitties,kitty +18:dog,dogs,puppy,puppies +19:horse,horses,foal +20:sheep +21:cow,cows,calf +22:elephant,elephants +23:bear,bears +24:zebra,zebras +25:giraffe,giraffes +26:invalid_class_id +27:backpack,backpacks +28:umbrella,umbrellas +29:invalid_class_id +30:invalid_class_id +31:handbag,handbags +32:tie +33:suitcase,suitcases +34:frisbee +35:skis +36:snowboard +37:sports ball +38:kite,kites +39:baseball bat +40:baseball glove +41:skateboard +42:surfboard +43:tennis racket +44:bottle,bottles,water bottle +45:invalid_class_id +46:wine glass,wine glasses,wineglass +47:cup,cups,water cup,water glass +48:fork,forks +49:knife,knives +50:spoon,spoons +51:bowl,bowls +52:banana,bananas +53:apple,apples,apple fruit +54:sandwich,sandwiches +55:orange fruit +56:broccoli +57:carrot,carrots +58:hot dog +59:pizza +60:donut,donuts +61:cake,cakes +62:chair,chairs +63:couch,sofa,sofas +64:potted plant,potted plants,pottedplant,pottedplants,planter,planters +65:bed,beds +66:invalid_class_id +67:dining table,dining tables,diningtable,diningtables,plate,plates,diningtable tablecloth +68:invalid_class_id +69:invalid_class_id +70:toilet +71:invalid_class_id +72:tv +73:laptop +74:mouse +75:tv remote,remote control +76:keyboard +77:cell phone,mobile +78:microwave +79:oven,ovens +80:toaster +81:sink,sinks +82:refrigerator,fridge +83:invalid_class_id +84:book,books +85:clock +86:vase,vases +87:scissor,scissors +88:teddy bear,teddy bears +89:hair drier +90:toothbrush,toothbrushes +91:invalid_class_id +92:banner,banners +93:blanket,blankets +94:invalid_class_id +95:bridge +96:invalid_class_id +97:invalid_class_id +98:invalid_class_id +99:invalid_class_id +100:cardboard +101:invalid_class_id +102:invalid_class_id +103:invalid_class_id +104:invalid_class_id +105:invalid_class_id +106:invalid_class_id +107:counter +108:invalid_class_id +109:curtain,curtains +110:invalid_class_id +111:invalid_class_id +112:door,doors +113:invalid_class_id +114:invalid_class_id +115:invalid_class_id +116:invalid_class_id +117:invalid_class_id +118:wood floor +119:flower,flowers +120:invalid_class_id +121:invalid_class_id +122:fruit,fruits +123:invalid_class_id +124:invalid_class_id +125:gravel +126:invalid_class_id +127:invalid_class_id +128:house +129:invalid_class_id +130:lamp,bulb,lamps,bulbs +131:invalid_class_id +132:invalid_class_id +133:mirror +134:invalid_class_id +135:invalid_class_id +136:invalid_class_id +137:invalid_class_id +138:tennis net +139:invalid_class_id +140:invalid_class_id +141:pillow,pillows +142:invalid_class_id +143:invalid_class_id +144:platform +145:playingfield,tennis court,baseball field,soccer field,tennis field +146:invalid_class_id +147:railroad +148:river +149:road +150:invalid_class_id +151:roof +152:invalid_class_id +153:invalid_class_id +154:sand +155:sea,sea wave,wave,waves +156:shelf +157:invalid_class_id +158:invalid_class_id +159:snow +160:invalid_class_id +161:stairs +162:invalid_class_id +163:invalid_class_id +164:invalid_class_id +165:invalid_class_id +166:tent +167:invalid_class_id +168:towel +169:invalid_class_id +170:invalid_class_id +171:brick wall +172:invalid_class_id +173:invalid_class_id +174:invalid_class_id +175:stone wall +176:tile wall +177:wood wall +178:water +179:invalid_class_id +180:window blind +181:window +182:invalid_class_id +183:invalid_class_id +184:tree,trees,palm tree,bushes +185:fence,fences +186:ceiling +187:sky,clouds +188:cabinet,cabinets +189:table +190:floor,flooring,tile floor +191:pavement +192:mountain,mountains +193:grass +194:dirt +195:paper +196:food +197:building,buildings +198:rock +199:wall,walls +200:rug \ No newline at end of file diff --git a/mask_adapter/data/datasets/coco_stuff_with_prompt_eng.txt b/mask_adapter/data/datasets/coco_stuff_with_prompt_eng.txt new file mode 100644 index 0000000000000000000000000000000000000000..3ad453c5f577c05514d2aa2d71c1871778263971 --- /dev/null +++ b/mask_adapter/data/datasets/coco_stuff_with_prompt_eng.txt @@ -0,0 +1,183 @@ +0:invalid_class_id +1:person,child,girl,boy,woman,man,people,children,girls,boys,women,men,lady,guy,ladies,guys +2:bicycle,bicycles,bike,bikes +3:car,cars +4:motorcycle,motorcycles +5:airplane,airplanes +6:bus,buses +7:train,trains,locomotive,locomotives,freight train +8:truck,trucks +9:boat,boats +10:traffic light +11:fire hydrant +12:invalid_class_id +13:stop sign +14:parking meter +15:bench,benches +16:bird,birds +17:cat,cats,kitties,kitty +18:dog,dogs,puppy,puppies +19:horse,horses,foal +20:sheep +21:cow,cows,calf +22:elephant,elephants +23:bear,bears +24:zebra,zebras +25:giraffe,giraffes +26:invalid_class_id +27:backpack,backpacks +28:umbrella,umbrellas +29:invalid_class_id +30:invalid_class_id +31:handbag,handbags +32:tie +33:suitcase,suitcases +34:frisbee +35:skis +36:snowboard +37:sports ball +38:kite,kites +39:baseball bat +40:baseball glove +41:skateboard +42:surfboard +43:tennis racket +44:bottle,bottles,water bottle +45:invalid_class_id +46:wine glass,wine glasses,wineglass +47:cup,cups,water cup,water glass +48:fork,forks +49:knife,knives +50:spoon,spoons +51:bowl,bowls +52:banana,bananas +53:apple,apples,apple fruit +54:sandwich,sandwiches +55:orange,oranges,orange fruit +56:broccoli +57:carrot,carrots +58:hot dog +59:pizza +60:donut,donuts +61:cake,cakes +62:chair,chairs +63:couch,sofa,sofas +64:potted plant,potted plants,pottedplant,pottedplants,planter,planters +65:bed,beds +66:invalid_class_id +67:dining table,dining tables,diningtable,diningtables,plate,plates,diningtable tablecloth +68:invalid_class_id +69:invalid_class_id +70:toilet +71:invalid_class_id +72:tv +73:laptop +74:mouse +75:remote,tv remote,remote control +76:keyboard +77:cell phone,mobile +78:microwave +79:oven,ovens +80:toaster +81:sink,sinks +82:refrigerator,fridge +83:invalid_class_id +84:book,books +85:clock +86:vase,vases +87:scissors,scissor +88:teddy bear,teddy bears +89:hair drier +90:toothbrush,toothbrushes +91:invalid_class_id +92:banner,banners +93:blanket,blankets +94:branch +95:bridge +96:building,buildings +97:bush,bushes +98:cabinet,cabinets +99:cage,cages +100:cardboard +101:carpet,carpets +102:ceiling-other,ceiling +103:ceiling-tile,ceiling tile +104:cloth +105:clothes +106:clouds +107:counter +108:cupboard,cupboards +109:curtain,curtains +110:desk-stuff,desk,desks +111:dirt +112:door-stuff,door,doors +113:fence,fences +114:floor-marble,marble floor,floor marble +115:floor-other,floor +116:floor-stone,stone floor,floor stone +117:floor-tile,tile floor,floor tile +118:floor-wood,wood floor,floor wood +119:flower,flowers +120:fog +121:food-other,food +122:fruit,fruits +123:furniture-other,furniture +124:grass +125:gravel +126:ground-other,ground +127:hill +128:house +129:leaves +130:light +131:mat +132:metal +133:mirror-stuff,mirror +134:moss +135:mountain,mountains +136:mud +137:napkin +138:net +139:paper +140:pavement +141:pillow,pillows +142:plant-other +143:plastic +144:platform +145:playingfield,tennis court,baseball field,soccer field,tennis field +146:railing +147:railroad +148:river +149:road +150:rock +151:roof +152:rug +153:salad +154:sand +155:sea,sea wave,wave,waves +156:shelf +157:sky-other,sky +158:skyscraper +159:snow +160:solid-other,solid +161:stairs +162:stone +163:straw +164:structural-other,structural +165:table +166:tent +167:textile-other,textile +168:towel +169:tree,trees,palm tree +170:vegetable +171:wall-brick,brick wall,wall brick +172:wall-concrete,concrete wall,wall concrete +173:wall-other,wall +174:wall-panel,wall panel,panel wall +175:wall-stone,stone wall,wall stone +176:wall-tile,wall tile,tile wall +177:wall-wood,wood wall, wall wood +178:water-other,water +179:waterdrops +180:window-blind,window blind +181:window-other,window +182:wood \ No newline at end of file diff --git a/mask_adapter/data/datasets/grand_with_prompt_eng.txt b/mask_adapter/data/datasets/grand_with_prompt_eng.txt new file mode 100644 index 0000000000000000000000000000000000000000..248010c41232e6927ca3db568beae60acd9de939 --- /dev/null +++ b/mask_adapter/data/datasets/grand_with_prompt_eng.txt @@ -0,0 +1,5252 @@ +1:antenna,antennas +2:clock_tower +3:tower,towers +4:clock +5:house,houses +6:life_buoy +7:sail,sails +8:raft,rafts +9:oar +10:life_jacket +11:hat,hats +12:Boats,boat,boats +13:paddle,paddles +14:person,persons +15:rowboat,rowboats +16:sculpture,sculptures +17:castle +18:train,train_,trains +19:pipe,pipes +20:vent,vents +21:camera,cameras +22:signboard +23:lightbulb +24:poster,posters +25:bench +26:traffic_light +27:ceiling,ceilings +28:station,stations +29:city +30:plaza +31:Statue,Statues,statue,statue_,statues +32:flowerpot +33:flagpole +34:jacket,jackets +35:view,views +36:scene,scenes +37:taillight +38:Cars,car,car_,cars +39:awning,awnings +40:headlight,headlights +41:cone,cones +42:motorcycle,motorcycles +43:motor_scooter +44:streetlight,streetlights +45:Traffic,traffic +46:building,buildings +47:mall +48:cathedral +49:camel,camels +50:backpack,backpacks +51:sweatshirt +52:pigeon,pigeons +53:road +54:group +55:Mountains,mountain,mountains +56:balloon,balloons +57:gym +58:air_conditioner +59:street_sign +60:pole,poles +61:Trees,tree,trees +62:grass +63:travel +64:bicycle,bicycles +65:bike,bikes +66:Street,street,streets +67:roof,roofs +68:costume,costumes +69:parade +70:Crowds,crowd,crowds +71:event,events +72:mask,masks +73:blanket,blankets +74:sandal,sandal_,sandals +75:banner,banners +76:cap,cap_,caps +77:minivan +78:bracelet,bracelets +79:flip-flop,flip-flop_,flip-flops +80:frisbee +81:shirt,shirts +82:headscarf +83:windshield_wiper +84:telephone_pole +85:platform +86:railway,railways +87:airplane,airplanes +88:formation,formations +89:trash_can +90:jean,jeans +91:boardwalk +92:cincture +93:march +94:dress +95:flag,flags +96:trouser,trousers +97:belt +98:Shoes,shoe,shoes +99:umbrella,umbrellas +100:parking +101:book,books +102:library +103:milk_can +104:log,logs +105:bottle,bottles +106:bottle_cap +107:water_bottle +108:garbage +109:plastic_bag +110:plastic +111:camping +112:campsite +113:beach +114:scarf +115:shopping_bag +116:sport,sports +117:handbag,handbags +118:helmet,helmets +119:tote_bag +120:rearview_mirror +121:wheel,wheels +122:grill +123:coat,coats +124:race_car +125:vehicle,vehicles +126:sky +127:red +128:serene +129:river +130:sock +131:legging,legging_,leggings +132:shoulder_bag +133:blouse +134:short_pants +135:telephone +136:lamppost +137:strap +138:park +139:People,people +140:fence +141:tall +142:Church,church +143:scooter,scooters +144:motorbike,motorbikes +145:motorcyclist,motorcyclists +146:shop,shops +147:Rows,row,rows +148:carnival +149:water,waters +150:Protest,protest +151:sign,signs +152:insect,insects +153:night +154:mosque,mosques +155:keyboard +156:cell_phone +157:computer +158:warehouse +159:potted_plant +160:palace +161:beautiful +162:tie +163:girl,girls +164:plane,planes +165:cart,carts +166:truck,trucks +167:load +168:terminal +169:tarmac +170:Airport,airport +171:golf +172:Newspapers,newspaper,newspapers +173:headband +174:tank_top_ +175:column,columns +176:wrench +177:lanyard +178:orange,orange_,oranges +179:chair,chairs +180:hose,hoses +181:runner,runner_,runners +182:Robots,robot,robots +183:knee_pad +184:convention +185:bamboo +186:jersey,jerseys +187:toy,toys +188:glass +189:dog,dogs +190:blue +191:day,days +192:gazebo,gazebos +193:deck_chair +194:towel,towels +195:swimwear +196:goggles +197:swimsuit,swimsuits +198:swimmer,swimmers +199:pool +200:bustling +201:necklace +202:thread +203:curtain +204:bowl,bowls +205:wig +206:loom +207:Construction,construction +208:barricade,barricades +209:crate,crates +210:curb +211:office +212:dinghy +213:waterway +214:bucket,buckets +215:sunhat +216:dumpster +217:area,areas +218:bandanna +219:rickshaw,rickshaws +220:ride,rides +221:crowded +222:arena +223:ring,rings +224:billboard,billboards +225:signage +226:license_plate +227:lot,lots +228:propeller +229:rural +230:field,fields +231:urban +232:hot-air +233:hot-air_balloon +234:tag +235:scoreboard +236:baseball_cap +237:race +238:smartphone,smartphones +239:wooden +240:tram +241:Tourists,tourist,tourists +242:tarp,tarps +243:nest +244:village +245:door,doors +246:entrance,entrances +247:watch +248:spotlight,spotlights +249:polo +250:timer +251:sweat_pants +252:manhole +253:table,tables +254:giant_panda +255:refrigerator +256:Ruins,ruin,ruins +257:horse,horses +258:stamp,stamps +259:fishing_rod +260:thermos_bottle +261:coconut,coconuts +262:dirt +263:trail +264:battery +265:solar_array +266:barge,barges +267:solar +268:array +269:water_ski +270:wet +271:staircase +272:picturesque +273:body +274:steering_wheel +275:machine,machines +276:suitcase,suitcases +277:jeep,jeeps +278:mirror +279:skirt,skirts +280:lake +281:sun +282:drum,drum_,drums +283:maraca +284:scissor,scissors +285:environment +286:dining_table +287:crossbar +288:vest,vests +289:obstacle +290:cup,cups +291:hamburger +292:food,foods +293:Dixie_cup +294:beef,beef_,beefs +295:reflector +296:icecream +297:bun +298:Ice,ice +299:rink +300:painting,paintings +301:courtyard +302:land +303:large +304:jet,jets +305:breathtaking +306:bus,bus_ +307:light,lights +308:ferry +309:squirrel +310:tail +311:distance +312:ball,balls +313:doorknob +314:fun +315:monitor,monitor_,monitors +316:knob +317:vending_machine +318:woman +319:raincoat +320:segway +321:puppet +322:bird,birds +323:crucifix +324:sports_ball +325:palm +326:walkway,walkways +327:highway,highways +328:skyscraper,skyscrapers +329:button,buttons +330:tire,tires +331:snow +332:Protesters,protester,protesters +333:baby_buggy +334:bow,bow_,bows +335:box +336:doll,dolls +337:stroller,strollers +338:photo,photos +339:skier,skiers +340:ski,skis +341:slope,slopes +342:fishing +343:pelican,pelicans +344:canal,canals +345:radar +346:theater,theaters +347:museum +348:cat,cats +349:window,windows +350:skyline +351:island,islands +352:surfboard,surfboards +353:map +354:rabbit +355:almond +356:carton,cartons +357:cookie,cookies +358:bell,bells +359:stop_sign +360:weathervane +361:banana,bananas +362:number +363:scenery +364:cargo +365:Cows,cow,cows +366:polo_shirt +367:handle,handles +368:equipment +369:display,displays +370:show,shows +371:Monument,monument +372:hillside +373:washer +374:automatic_washer +375:van,vans +376:ice_maker +377:helicopter,helicopters +378:christmas +379:tennis +380:tray +381:stool,stools +382:dish_antenna +383:silo,silos +384:restaurant,restaurants +385:cuisine +386:scale,scale_,scales +387:market,markets +388:vegetable,vegetables +389:cowboy_hat +390:Fans,fan,fans +391:basket,baskets +392:tomato +393:carrot,carrots +394:crabmeat +395:potato +396:cauliflower +397:sausage +398:container,containers +399:tong,tongs +400:lettuce +401:dishes +402:chopstick,chopsticks +403:jar,jars +404:straw,straw_,straws +405:pot,pots +406:spoon,spoons +407:headboard +408:lampshade +409:drawer +410:bed,beds +411:vase,vases +412:pillow,pillows +413:lamp,lamps +414:chandelier,chandeliers +415:candle,candles +416:flower_arrangement +417:mattress +418:dresser +419:living +420:furniture +421:ship,ships +422:dock +423:ocean +424:float,floats +425:slide,slides +426:thrilling +427:pant,pants +428:cigarette_case +429:mast +430:apple,apples +431:buoy +432:sunglasses +433:produce +434:television_set +435:kite,kites +436:tv,tvs +437:duffel_bag +438:dishwasher_detergent +439:apron +440:duck,ducks +441:photograph +442:bow-tie +443:cruise,cruises +444:cargo_ship +445:port +446:rifle,rifles +447:cattle +448:yoke_ +449:beanie +450:splash +451:flood,floods +452:mushroom +453:stump +454:moped +455:license +456:beret +457:turban,turbans +458:wagon +459:miniature +460:Ferris_wheel +461:cab,cab_,cabs +462:trash +463:tour +464:amusement +465:wine_glass +466:wine,wines +467:outdoor,outdoors +468:star,stars +469:volleyball +470:pouch +471:stadium +472:safety +473:can,cans +474:hinge +475:glove,gloves +476:carpenter +477:lush +478:lantern,lanterns +479:gargoyle +480:variety +481:aircraft +482:fighter,fighters +483:roller_skate +484:Rollerblade +485:skateboard +486:activities +487:training +488:sled,sleds +489:winter +490:family +491:stone +492:magazine,magazines +493:booklet +494:capabilities +495:showcase +496:fighter_jet +497:cabana +498:forklift +499:line,lines +500:dark +501:vibrant +502:folding_chair +503:speaker,speaker_,speakers +504:Flowers,flower,flowers +505:intersection,intersections +506:soccer_ball +507:soccer +508:charming +509:sunset +510:calm +511:golfcart +512:eagle +513:obelisk,obelisks +514:memorial +515:pickup +516:gauge,gauges +517:bolt +518:thermometer +519:pressure +520:backdrop +521:kitchen +522:stove +523:sink,sinks +524:faucet +525:cabinet,cabinets +526:counter +527:cruise_ship +528:cloudy +529:chef,chefs +530:flap +531:spectacle,spectacles +532:skies +533:motor_vehicle +534:coffee +535:seine +536:film +537:kuala +538:crutch +539:grocery_bag +540:sunbathe +541:sand,sands +542:tank,tank_,tanks +543:cylinder,cylinders +544:tablecloth,tablecloths +545:tartan +546:coat_hanger +547:canopy +548:bazaar +549:roller +550:knee +551:pavement +552:store,stores +553:groom +554:wedding +555:phone,phones +556:crown +557:Christmas_tree +558:paper +559:crosswalk +560:neon +561:crane,cranes +562:landing +563:plume +564:carry +565:pepper,peppers +566:plate,plates +567:asparagus +568:shrimp +569:landscape +570:hook +571:parrot,parrots +572:marina +573:harbor +574:doormat +575:shrine,shrines +576:choker +577:women +578:padlock +579:magnet +580:reflection,reflections +581:biker,bikers +582:cyclist,cyclists +583:shawl +584:cover +585:taxi,taxis +586:ceremony +587:tiara +588:veil,veils +589:bouquet,bouquets +590:man +591:mudslide +592:rock,rocks +593:seat,seats +594:brass_plaque +595:banknote +596:pile,piles +597:sum +598:tent,tents +599:walk +600:fork +601:cotton +602:teapot +603:dessert,desserts +604:board,boards +605:suit,suit_,suits +606:buddha +607:cloud,clouds +608:fire_hydrant +609:pottery +610:mug,mugs +611:teacup +612:sunflower,sunflowers +613:cottage +614:garden,gardens +615:diaper,diapers +616:front +617:holiday +618:fountain,fountains +619:boot,boots +620:contrast +621:cannon,cannons +622:sticker +623:robe,robes +624:shark,sharks +625:seagull,seagulls +626:flock +627:armband +628:earring,earrings +629:jewelry +630:mannequin,mannequins +631:stand,stands +632:atm,atms +633:pillar,pillars +634:town +635:tugboat +636:bridge,bridges +637:microphone +638:organization +639:alligator +640:broom +641:papaya +642:candle_holder +643:close +644:ambulance,ambulances +645:spear +646:festival +647:cliff,cliffs +648:stupa,stupas +649:ladder,ladders +650:home,homes +651:necktie +652:cougar +653:blinker +654:fish,fish_,fishs +655:shopper,shoppers +656:auditorium +657:orchard +658:pickup_truck +659:junkyard +660:knocker,knocker_,knockers +661:couch +662:telescope,telescopes +663:barrel,barrels +664:mail_slot +665:strawberry +666:iphone,iphones +667:snack,snacks +668:business_card +669:ferris +670:fair +671:motorboat +672:bay +673:calculator +674:dancer,dancers +675:audience +676:weapon,weapons +677:army +678:wagon_wheel +679:shovel +680:tight,tights,tights_ +681:bunch +682:children +683:cozy +684:lone +685:airliner,airliners +686:takeoff +687:canoe,canoes +688:armchair +689:room +690:shelf +691:passenger_ship +692:desk,desks +693:cobblestone +694:skewer,skewers +695:bin +696:seafood +697:bag,bags +698:balcony +699:card,cards +700:silver +701:pair,pairs +702:drumstick +703:mitten +704:strainer +705:wok +706:quilt,quilts +707:piece,pieces +708:playground +709:shopping +710:birdhouse +711:sydney +712:bobbin +713:product,products +714:factory +715:lion,lions +716:mascot +717:handcart +718:passenger_car_ +719:decker +720:pedestrian,pedestrians +721:spire,spires +722:driveway +723:balustrade +724:duomo +725:package,packages +726:lamb-chop +727:rib,rib_,ribs +728:bottle_opener +729:Meat,meat,meats +730:picnic +731:baby +732:floor,floors +733:tractor,tractor_,tractors +734:hay +735:flannel +736:carriage,carriages +737:shield,shields +738:solemn +739:interior +740:cellphone +741:garage,garages +742:rooftop,rooftops +743:guitar,guitars +744:path +745:window_box_ +746:pad,pads +747:tennis_racket +748:whale +749:mosaic,mosaics +750:tile,tiles +751:colorful +752:dragon +753:top +754:notre +755:pinecone +756:bowler_hat +757:teddy_bear +758:figurine,figurines +759:cushion,cushions +760:bookcase +761:stall,stalls +762:clothing +763:officer,officers +764:business +765:binocular,binoculars +766:leather +767:stick,sticks +768:projectile,projectile_,projectiles +769:garden_hose +770:briefcase +771:Police,police +772:demonstration +773:tripod +774:overpass +775:motor +776:bulletproof_vest +777:camouflage +778:dress_suit +779:instrument,instruments +780:back,backs +781:cook +782:pan,pan_,pans +783:curry +784:sari,saris +785:desert +786:wristlet +787:team,teams +788:shack +789:hut,huts +790:gravestone,gravestones +791:postcard +792:shirtless +793:chapel +794:drop +795:fruit,fruits +796:procession +797:deity +798:worship +799:salami +800:salad +801:bridal_gown +802:liquor +803:bread +804:cucumber +805:blazer +806:flute_glass +807:platter +808:wineglass +809:broccoli +810:automobile +811:Bible,bible +812:bronze +813:elephant,elephants +814:trainer +815:two +816:alley +817:porch +818:plaid +819:parasol +820:lighthouse +821:slipper,slipper_,slippers +822:campfire +823:bonfire +824:graffiti +825:short,shorts +826:cable,cables +827:rice +828:eggplant,eggplants +829:vendor,vendors +830:clothes_hamper +831:many +832:river_boat +833:pig,pigs +834:fairy +835:wingspan +836:gadget,gadgets +837:burger +838:teddy +839:carousel +840:coin +841:parachute +842:gas +843:Soldiers,soldier,soldiers +844:uniform,uniforms +845:puffin +846:beverage,beverages +847:identity_card +848:sofa +849:alcohol +850:ashtray +851:drink,drinks +852:party +853:ostrich +854:hog +855:horned_cow +856:rearview +857:trailer,trailers +858:swim +859:location,locations +860:sweater +861:windmill,windmills +862:wind +863:steeple,steeples +864:gathering +865:walking_stick +866:palette +867:pallet,pallets +868:trunk,trunks +869:sunshade +870:yacht,yachts +871:spice,spices +872:fill +873:collection +874:tin +875:power +876:credit +877:bull,bulls +878:surrounding,surroundings +879:ikea +880:latch +881:gate,gates +882:la,las +883:snowy +884:wall,walls +885:musician,musicians +886:napkin,napkins +887:lemon,lemons +888:stirrup +889:milestone +890:spectator +891:sphere +892:laptop_computer +893:plant,plants +894:urn +895:stair,stairs +896:waterfront +897:shutter,shutters +898:trailer_truck +899:perspective +900:forest +901:step,steps +902:football,football_,footballs +903:fabric,fabrics +904:plaque,plaques +905:walking_cane +906:sidewalk +907:award,awards +908:pocket_watch +909:dashboard +910:windshield +911:rainy +912:roman +913:basketball +914:hike +915:earphone +916:wire,wires +917:bullhorn +918:rocket,rockets +919:ironing_board +920:dove +921:racecar +922:nascar +923:archery +924:laboratory +925:condiment,condiments +926:chili,chili_,chilis +927:crisp,crisp_,crisps +928:supermarket +929:step_stool +930:farmer,farmers +931:printer +932:wall_socket +933:belt_buckle +934:trade +935:green_onion +936:brussels_sprouts +937:daikon +938:celery +939:shampoo,shampoos +940:retail +941:three +942:dunkin +943:pedestal +944:bicyclist +945:barrier,barriers +946:label +947:aerosol_can +948:cleansing_agent +949:trophy +950:switch +951:circuit,circuits +952:meter,meters +953:panel,panels +954:payphone,payphones +955:beauty +956:square +957:donut,donuts +958:cape +959:pond +960:speedboat +961:calendar +962:lawn +963:pennant +964:toolbox +965:cityscape,cityscapes +966:pin,pin_,pins +967:chopping_board +968:Croissants,croissant,croissants +969:crescent +970:pastry +971:tassel +972:headdress +973:skateboarder,skateboarders +974:skate +975:remote +976:laptop,laptops +977:ladybug +978:knife +979:men +980:hair +981:ramp,ramps +982:cross +983:knight,knights +984:sword,swords +985:hot_dog +986:details +987:smoky +988:gargle +989:detergent +990:cast +991:lounge,lounges +992:bagel +993:onion,onions +994:parsley +995:drone +996:footage +997:curve,curves +998:prawn +999:textile,textiles +1000:gasmask +1001:pyramid,pyramids +1002:adobe +1003:broach +1004:pocket +1005:key +1006:classroom +1007:bush +1008:wetsuit,wetsuits +1009:barrette +1010:moment,moments +1011:machine_gun +1012:buffet +1013:life +1014:fleet +1015:wiper +1016:nightstand +1017:bedroom +1018:tinsel +1019:basketball_backboard +1020:ballet +1021:royal +1022:tea +1023:tape,tape_,tapes +1024:easel +1025:pathway +1026:wheelchair +1027:frying_pan +1028:water_tower +1029:blackboard +1030:bar,bars +1031:swamp +1032:trafalgar +1033:sewing_machine +1034:workbench +1035:workshop +1036:vessel +1037:fries +1038:brick,bricks +1039:alleyway +1040:bathtub +1041:sack +1042:garbage_truck +1043:air +1044:ski_parka +1045:sailboat,sailboats +1046:rose,roses +1047:concept +1048:middle +1049:picture,pictures +1050:sugarcane,sugarcane_,sugarcanes +1051:jewellery +1052:paintbrush +1053:saxophone +1054:performance +1055:ivy +1056:ottoman +1057:patio +1058:planet +1059:bandage +1060:pergola +1061:notebook,notebooks +1062:heart +1063:touch +1064:opening +1065:cowboy,cowboys +1066:track,tracks +1067:deer +1068:mural,murals +1069:coffee_maker +1070:saucer +1071:place_mat +1072:trolley +1073:rubber_band +1074:protestor +1075:pier +1076:hill,hills +1077:altar +1078:archway +1079:cellular_telephone +1080:pen +1081:background +1082:suspenders +1083:bride +1084:cufflink +1085:metal +1086:net,nets +1087:tow_truck +1088:telegraph +1089:dolphin,dolphins +1090:giraffe,giraffes +1091:lumber +1092:fort +1093:marketplace +1094:wheelbarrow +1095:barrow +1096:gardenia +1097:horse_buggy +1098:anklet +1099:rail,rails +1100:mat,mat_,mats +1101:hockey +1102:envelope +1103:camper,camper_,campers +1104:vintage +1105:outcrop +1106:water_jug +1107:bear +1108:pink +1109:thermostat +1110:cornice +1111:bulletin +1112:bakery +1113:army_tank +1114:carpet,carpets +1115:horse_carriage +1116:lightning_rod +1117:lagoon +1118:resort +1119:thrill +1120:pew,pew_,pews +1121:presentation,presentations +1122:trench +1123:oven +1124:cooking +1125:telephone_booth +1126:tricycle +1127:jam,jams +1128:peanut,peanuts +1129:memory +1130:outhouse +1131:luggage +1132:kayak,kayaks +1133:arrangement,arrangements +1134:head,heads +1135:game,games +1136:fire_extinguisher +1137:shopping_cart +1138:speed +1139:toothpick +1140:bikini +1141:art,arts +1142:Swans,swan,swans +1143:cardigan +1144:notepad +1145:morning +1146:kimono,kimonos +1147:plantation +1148:structure,structures +1149:cherry +1150:sense +1151:terrace +1152:wreath +1153:footwear +1154:cake,cakes +1155:buy +1156:warrior,warriors +1157:pumpkin,pumpkins +1158:indoor +1159:mother +1160:icon,icons +1161:jumpsuit +1162:sleeping_bag +1163:yoga +1164:peaceful +1165:intricate +1166:colosseum +1167:glow +1168:deck +1169:cooler,cooler_,coolers +1170:wet_suit +1171:camp +1172:neckerchief +1173:baseball +1174:remembrance +1175:saddlebag +1176:turtleneck_ +1177:cop +1178:postbox,postbox_,postboxs +1179:mailbox,mailbox_,mailboxs +1180:podium +1181:object,objects +1182:lotus +1183:pizza,pizzas +1184:hammer +1185:cigarette,cigarettes +1186:litter +1187:messy +1188:hallway +1189:camera_lens +1190:tablet,tablets +1191:wallet +1192:drill +1193:skull +1194:passport,passports +1195:pagoda,pagodas +1196:armor +1197:model,models +1198:destination,destinations +1199:meal,meals +1200:purple +1201:aquarium,aquariums +1202:atv,atvs +1203:muddy +1204:exterior,exteriors +1205:sheep +1206:pinwheel +1207:skullcap +1208:autumn +1209:customer,customers +1210:item,items +1211:soda +1212:wicker +1213:sit +1214:gun +1215:harmonium +1216:organ +1217:brassiere +1218:waist +1219:corset +1220:gondola,gondola_,gondolas +1221:globe +1222:zoo +1223:dance +1224:sportswear +1225:keycard +1226:coverall +1227:Workers,worker,workers +1228:saddle_blanket +1229:surface,surfaces +1230:coke +1231:court,courts +1232:cardboard +1233:Catch,catch +1234:coffee_table +1235:gravy_boat +1236:seabird +1237:project,projects +1238:gown,gowns +1239:diploma,diplomas +1240:rescue +1241:horseback +1242:mesh +1243:tapestry +1244:frescoes +1245:rally +1246:clipboard +1247:subway +1248:seashell +1249:blueberry +1250:muffin +1251:raspberry +1252:opera +1253:mix +1254:exhibition +1255:exhibit,exhibits +1256:patch +1257:toothbrush +1258:salon +1259:hospital +1260:grape,grapes +1261:design,designs +1262:duct_tape +1263:couple +1264:bagpipe +1265:dress_hat +1266:cornet +1267:toaster +1268:television,televisions +1269:sombrero +1270:atlantis +1271:pistol +1272:troop +1273:stylus +1274:baseball_bat +1275:black +1276:smoker +1277:cantaloup +1278:cantaloupe +1279:melon,melons +1280:power_shovel +1281:bulldozer +1282:pub +1283:snowman +1284:owl +1285:sandy +1286:jockey,jockeys +1287:lorry +1288:intriguing +1289:kamps +1290:auto +1291:interesting +1292:reindeer +1293:goat,goats +1294:animal,animals +1295:herd +1296:visor +1297:marathon +1298:Apartment,apartment,apartments +1299:bank,banks +1300:karaoke +1301:speedometer +1302:stop +1303:medicine +1304:several +1305:poncho +1306:hedge,hedges +1307:bowling_ball +1308:parcel +1309:doughnut,doughnuts +1310:convenience +1311:goods +1312:fresco +1313:estate +1314:trophy_cup +1315:fire_alarm +1316:easter +1317:egg,eggs +1318:scaffolding +1319:performer +1320:ski_boot +1321:handgun +1322:dozer +1323:excavator,excavators +1324:lexus +1325:pie,pies +1326:job +1327:brown +1328:mouse,mouse_,mouses +1329:projector +1330:railing,railings +1331:rocky +1332:concert +1333:gull +1334:options +1335:grocery +1336:artwork +1337:escalator,escalators +1338:mansion +1339:luxury +1340:python +1341:bonnet +1342:frame +1343:evening +1344:portrait +1345:rio +1346:round +1347:money +1348:grassy +1349:stirrer +1350:caldron +1351:setting +1352:grille +1353:celebration +1354:logo,logos +1355:dome,domes +1356:snowmobile,snowmobiles +1357:kettle +1358:mallet +1359:rag +1360:koala +1361:closet +1362:canister +1363:remote_control +1364:hassock +1365:hotel,hotels +1366:ledge +1367:mop +1368:t +1369:doorway,doorways +1370:webcam +1371:johnsons +1372:temple,temples +1373:shipping +1374:postage +1375:image,images +1376:ground,grounds +1377:birdbath +1378:toilet +1379:arrow,arrows +1380:dish +1381:fire,fires +1382:otherwise +1383:attire +1384:lollipop +1385:candy +1386:peach +1387:plum,plums +1388:apricot +1389:beard +1390:bead,beads +1391:cosmetic,cosmetics +1392:cube,cubes +1393:princess +1394:underwear +1395:wrestling +1396:lab_coat +1397:chinaware +1398:appliance,appliances +1399:hardwood +1400:space,spaces +1401:cane +1402:wrought +1403:vending +1404:school +1405:nature +1406:yoghurt +1407:dispenser +1408:mound,mound_,mounds +1409:singapore +1410:stack,stacks +1411:circle +1412:milk +1413:cock +1414:sleepwear +1415:heartwarming +1416:boy,boys +1417:jewel +1418:boxing +1419:theme +1420:currency +1421:glasses +1422:iPod,ipod +1423:journal +1424:workspace +1425:ram,ram_,rams +1426:saddle,saddle_,saddles +1427:noseband_ +1428:equestrian +1429:sunny +1430:passenger,passengers +1431:ancient +1432:festive +1433:architecture +1434:radiator +1435:lesson +1436:computer_keyboard +1437:measuring_stick +1438:marker +1439:pencil +1440:pop,pop_,pops +1441:receipt +1442:pineapple,pineapples +1443:toothpaste +1444:scraper +1445:surfer,surfers +1446:offerings +1447:turbine,turbines +1448:material,materials +1449:parking_meter +1450:blinder,blinder_,blinders +1451:Aisles,aisle,aisles +1452:stepladder +1453:bathroom +1454:off +1455:warning +1456:promenade +1457:tux +1458:tool,tools +1459:wine_bucket +1460:Chicken,chicken,chicken_,chickens +1461:radio_receiver +1462:tobacco_pipe +1463:wall_clock +1464:fireplace +1465:dollar +1466:goal +1467:microwave +1468:soap +1469:bubble,bubbles +1470:megaphone +1471:purse +1472:player,players +1473:play +1474:dunk +1475:bargains +1476:pulpit +1477:bell_pepper +1478:escape +1479:bison +1480:buffalo +1481:suv +1482:lavender +1483:headphone +1484:company +1485:wooden_leg +1486:radio +1487:face +1488:drive +1489:unique +1490:corner +1491:engine,engines +1492:text +1493:liquidity +1494:icing +1495:florist +1496:dustpan +1497:campus +1498:newsstand +1499:hold +1500:lab +1501:cloak +1502:priority +1503:hindu +1504:flashlight +1505:locker +1506:wave,waves +1507:monster +1508:soup +1509:charity +1510:hammock,hammocks +1511:stroll +1512:lift,lifts +1513:sandwich +1514:greenery +1515:wreckage +1516:stream +1517:course +1518:marble +1519:midst +1520:craftsmanship +1521:word,words +1522:turnip +1523:tunnel +1524:destroyer +1525:greenhouse,greenhouses +1526:data +1527:buses +1528:young +1529:easyjet +1530:samsung +1531:somber +1532:cemetery +1533:showroom +1534:flavor,flavors +1535:referee +1536:bleacher,bleachers +1537:monkey,monkeys +1538:ski_pole +1539:snowboarder +1540:clutch +1541:tokyo +1542:astronaut,astronauts +1543:decoration,decorations +1544:runway +1545:perfume +1546:bookshelf +1547:tennis_ball +1548:eiffel +1549:guitarist +1550:stunt +1551:video +1552:lime +1553:fruit_juice +1554:Lego,lego +1555:alarm_clock +1556:bedding +1557:bedspread +1558:leaves +1559:saw +1560:plank +1561:buyuk +1562:bodyboard +1563:chocolate_bar +1564:chocolate +1565:sale +1566:wear +1567:parliament +1568:nightclub +1569:headset +1570:menu +1571:chalkboard +1572:fire_hose +1573:foundation +1574:elk +1575:bullet_train +1576:angel,angels +1577:queue +1578:osteria +1579:pitcher,pitcher_,pitchers +1580:beer_bottle +1581:place,places +1582:treat,treats +1583:turtle,turtles +1584:fairground +1585:cracker,crackers +1586:biscuit +1587:buddhist +1588:experience +1589:cymbal +1590:chariot +1591:acorn,acorns +1592:walnut,walnuts +1593:screwdriver +1594:screen,screens +1595:ruler +1596:yogurt +1597:cornmeal +1598:color,colors +1599:overall,overalls,overalls_ +1600:fisherman +1601:nursery +1602:donkey,donkeys +1603:razor +1604:app,apps +1605:fireplug +1606:dishrag +1607:mule +1608:shed +1609:planter,planters +1610:shelter +1611:icloud +1612:outside +1613:dealership,dealerships +1614:truffle,truffle_,truffles +1615:fudge +1616:sea +1617:history +1618:t-shirt +1619:facade,facades +1620:stilt,stilts +1621:toilet_tissue +1622:lid,lids +1623:jacuzzi +1624:bedcover +1625:generator +1626:weather +1627:airshow +1628:conference +1629:shade +1630:reef +1631:biplane +1632:penguin,penguins +1633:green,greens +1634:beer_can +1635:series +1636:beer,beers +1637:style,styles +1638:mermaid +1639:laundromat +1640:trombone +1641:trumpet,trumpets +1642:bass +1643:gorilla +1644:firework,fireworks +1645:graduates +1646:atrium +1647:snowboard +1648:information +1649:side,sides +1650:treatment +1651:certificate +1652:placard +1653:steak,steak_,steaks +1654:swing,swings +1655:tachometer +1656:sushi +1657:tuna +1658:saltshaker +1659:trampoline +1660:wheelie +1661:bulletin_board +1662:hairnet +1663:medical +1664:surgeons +1665:cricket,crickets +1666:cabbage,cabbages +1667:fiat +1668:chain +1669:eerie +1670:bass_horn +1671:band +1672:kilt +1673:drummer +1674:mesmerizing +1675:marigold +1676:thumbtack +1677:router,router_,routers +1678:paper_towel +1679:microscope,microscopes +1680:daybed +1681:storefront,storefronts +1682:branch +1683:ballet_skirt +1684:chess +1685:mobile +1686:gingerbread +1687:pharmacy +1688:wendy,wendys +1689:cryptocurrency +1690:lip_balm +1691:bee,bees +1692:bug +1693:crack +1694:attention +1695:miniskirt +1696:gift_wrap +1697:seaplane +1698:dining +1699:rain +1700:wine_bottle +1701:lunch +1702:monk,monks +1703:cave +1704:mausoleum +1705:banco +1706:bath,baths +1707:bat,bat_,bats +1708:halloween +1709:sidecar +1710:plywood +1711:cloth +1712:nike +1713:geranium,geraniums +1714:blackberry +1715:berry +1716:tart +1717:ladle +1718:façade +1719:time,times +1720:binder +1721:racer +1722:gravel +1723:brandenburger +1724:reed,reeds +1725:craft,crafts +1726:crossing +1727:open +1728:afterpay +1729:landfill +1730:shoreline +1731:racket,rackets +1732:activity +1733:blimp +1734:read +1735:lane,lanes +1736:inside +1737:chain_mail +1738:camcorder +1739:bookstore +1740:squash +1741:watermelon,watermelons +1742:gourd +1743:footstool +1744:desolate +1745:houseboat +1746:lobby +1747:spacecraft +1748:reclining +1749:register +1750:arcade +1751:driftwood +1752:oil_lamp +1753:flash +1754:cupcake +1755:assortment +1756:disaster +1757:mousepad +1758:pencil_sharpener +1759:vineyard,vineyards +1760:trevi +1761:wood,woods +1762:stairway +1763:butterfly +1764:feather,feathers +1765:halter_top +1766:infant +1767:bobby_pin +1768:pleasant +1769:ups +1770:exercise,exercises +1771:roundabout +1772:talent +1773:neighborhood +1774:boulder,boulders +1775:herbs +1776:hall +1777:captivating +1778:cycling +1779:stainless +1780:ben +1781:Volkswagen,volkswagen +1782:attraction,attractions +1783:voltage +1784:edible_corn +1785:corn +1786:churches +1787:piano +1788:wolf +1789:railcar,railcar_,railcars +1790:stage,stages +1791:size,sizes +1792:wheat,wheats +1793:argentina +1794:satchel +1795:lit +1796:gift +1797:cocktail +1798:measuring_cup +1799:emergency +1800:panda +1801:soya_milk +1802:frog +1803:ribbon,ribbons +1804:rapids +1805:café +1806:goldfish +1807:fishbowl +1808:juice,juices +1809:umpire +1810:baseball_glove +1811:catcher +1812:rope,ropes +1813:occasion,occasions +1814:screw +1815:underside +1816:niche +1817:garland +1818:action +1819:tranquil +1820:whatsapp +1821:messaging +1822:jet_plane +1823:note,notes +1824:pavilion +1825:catamaran +1826:seawall +1827:bullet,bullets +1828:rise +1829:wind_chime +1830:scaffold +1831:cash +1832:dusk +1833:yak +1834:movie +1835:cutlery +1836:plier,pliers +1837:badge,badges +1838:hiking +1839:leotard +1840:fi +1841:backyard +1842:puddle +1843:horn +1844:monastery +1845:catapult +1846:pear +1847:site +1848:crochet +1849:countryside +1850:fire_engine +1851:fireman +1852:projection +1853:balconies +1854:mary +1855:reception +1856:dancing +1857:gazelle +1858:habitat +1859:school_bus +1860:warship +1861:congestion +1862:arch +1863:stingray +1864:terrain +1865:mixer,mixer_,mixers +1866:presence +1867:handkerchief +1868:cistern +1869:shaving_cream +1870:tube,tubes +1871:tusk +1872:debris +1873:whipped_cream +1874:jelly +1875:sparkler_ +1876:seating +1877:lamborghini +1878:brand +1879:crayon +1880:hermitage +1881:football_helmet +1882:frosting +1883:lighting +1884:corridor +1885:vinegar +1886:arches +1887:pantyhose +1888:zebra,zebras +1889:shower_head +1890:ornate +1891:polka +1892:wand,wands +1893:wristband +1894:chimney,chimneys +1895:accordion +1896:headstall_ +1897:fujifilm +1898:writing +1899:conservatory +1900:result +1901:goose +1902:beehive +1903:maid +1904:mailboxes +1905:railroad +1906:barbie +1907:thai +1908:sportback +1909:pick +1910:coatrack +1911:yellow +1912:hand,hands +1913:coil +1914:f1 +1915:candy_cane +1916:boxing_glove +1917:skiing +1918:twitter +1919:dice +1920:die +1921:silverware +1922:pickle +1923:tiger +1924:mud +1925:ceramic +1926:bookmark +1927:twine +1928:setup +1929:go +1930:headquarter,headquarters +1931:shot +1932:golfer,golfers +1933:tortoise +1934:graduation +1935:sweat +1936:monarch +1937:heritage +1938:water_scooter +1939:tender +1940:wash +1941:sundial +1942:bath_mat +1943:pitchfork +1944:desktop +1945:lip +1946:inhaler +1947:surreal +1948:fox +1949:work +1950:bust,busts +1951:control,controls +1952:heater +1953:farm +1954:asml +1955:rodeo +1956:flipper,flipper_,flippers +1957:yard +1958:water_cooler +1959:chap +1960:slav +1961:syringe,syringes +1962:earplug +1963:cork,cork_,corks +1964:grinder +1965:lizard +1966:minaret,minarets +1967:hangar +1968:University,university +1969:clay +1970:fleece +1971:coastline +1972:crew +1973:snowstorm +1974:avocado +1975:commuter +1976:year,years +1977:missile +1978:ritual +1979:hang +1980:malaysia +1981:wikipedia +1982:television_camera +1983:dog_collar +1984:claw +1985:priest,priests +1986:ingredient,ingredients +1987:clown +1988:rug +1989:dumbbell +1990:mahal +1991:flea +1992:handcuff +1993:Anchor,anchor +1994:ice_skate +1995:hockey_stick +1996:brake_light +1997:dinosaur,dinosaurs +1998:drag +1999:Smoke,smoke +2000:inspiring +2001:jug,jugs +2002:holly +2003:igniter +2004:shore,shores +2005:freight +2006:hibiscus +2007:hollywoodreporter.com +2008:expanse +2009:dalmatian +2010:foggy +2011:bullfighting +2012:ginger +2013:type,types +2014:ballroom +2015:maintenance +2016:fawn +2017:enclosure +2018:steel +2019:set +2020:puzzle +2021:energy +2022:operating +2023:medieval +2024:well +2025:rugby +2026:police_cruiser +2027:uk +2028:mammoth +2029:australia +2030:moody +2031:beeper +2032:pork +2033:dishtowel +2034:spring,springs +2035:corvette +2036:sheet,sheets +2037:designer +2038:start +2039:nutcracker +2040:submarine +2041:stormy +2042:ham +2043:salmon,salmon_,salmons +2044:borobudur +2045:flame,flames +2046:concentration +2047:shepherd +2048:military +2049:birdcage +2050:zucchini +2051:concrete +2052:barn +2053:transporter +2054:breechcloth +2055:gemstone,gemstones +2056:teepee,teepees +2057:catedral +2058:bulletproof +2059:parasail,parasail_,parasails +2060:end +2061:mercedes +2062:freight_car +2063:cage +2064:waterfall,waterfalls +2065:stretcher +2066:paramedic,paramedics +2067:lighter +2068:tug +2069:cream +2070:mobility +2071:watering_can +2072:dollhouse +2073:serve +2074:hit +2075:Electronics,electronic,electronics +2076:iron,iron_,irons +2077:break +2078:road_map +2079:wing,wings +2080:violinist +2081:music +2082:graveyard +2083:drinking +2084:liquid +2085:arctic_ +2086:kiosk,kiosks +2087:fortress +2088:pegboard +2089:handsaw +2090:chairlift +2091:individuals +2092:electricity +2093:poultry +2094:condition,conditions +2095:pony +2096:corral +2097:vespa +2098:caravan +2099:date,date_,dates +2100:asters +2101:advertisement,advertisements +2102:loafer,loafers +2103:country +2104:spatula +2105:rugged +2106:slab +2107:barbed +2108:stark +2109:post +2110:slaw +2111:moon +2112:internet +2113:petal,petals +2114:swimming +2115:colander +2116:paper_plate +2117:armoire +2118:cappuccino +2119:nut,nuts +2120:nail,nails +2121:valley +2122:cactus +2123:appetizer +2124:bridal +2125:samurai +2126:sweet_potato +2127:incense +2128:gold,golds +2129:exit +2130:houseplant +2131:limo +2132:fly +2133:makeshift +2134:flute +2135:creatures +2136:file,file_,files +2137:loaf +2138:Clothes,clothe,clothes +2139:peace +2140:spider +2141:child +2142:compass +2143:driving +2144:kitchenware +2145:ladies +2146:clothespin +2147:name,names +2148:vantage +2149:fur,furs +2150:claus +2151:purpose,purposes +2152:ax +2153:hand_glass +2154:freedom +2155:skater,skaters +2156:beijing +2157:amplifier +2158:pirate_flag +2159:flight,flights +2160:elevator_car +2161:pill +2162:london +2163:tambourine +2164:pacifier +2165:blood +2166:gaming +2167:situation +2168:beak +2169:hairbrush +2170:vacuum_cleaner +2171:tote +2172:provoking +2173:rocking_chair +2174:convertible,convertible_,convertibles +2175:sleigh +2176:foot +2177:pit +2178:keg +2179:batch +2180:penny,penny_,pennys +2181:old +2182:illuminate +2183:domestic_ass +2184:feature,features +2185:minibus +2186:sawhorse +2187:appetizing +2188:wrestler,wrestlers +2189:match +2190:cello +2191:brandenburg +2192:strawman +2193:banyan +2194:arab +2195:courthouse +2196:bathrobe +2197:lemonade +2198:lace +2199:illustration +2200:booth,booths +2201:coaster +2202:converse +2203:shape +2204:zoom +2205:mine +2206:hilltop +2207:denim +2208:pepper_mill +2209:patty_ +2210:contest +2211:cabin,cabins +2212:swarovski +2213:sing +2214:pajama,pajamas +2215:taj +2216:vine,vines +2217:enchanting +2218:dune,dunes +2219:blender +2220:security +2221:hairpin +2222:hoodie +2223:phonograph_record +2224:vanity +2225:tangerine +2226:citrus +2227:orange_juice +2228:jungle +2229:grave,graves +2230:garlic +2231:beanbag +2232:skill,skills +2233:seoul +2234:pitch +2235:paintball +2236:eraser +2237:nosebag_ +2238:mechanic +2239:accident +2240:splatters +2241:blend +2242:steam +2243:charging +2244:ticket,tickets +2245:journey +2246:nerf +2247:beam,beams +2248:dense +2249:benches +2250:casino,casinos +2251:white +2252:nativity +2253:lifeguard +2254:hood,hoods +2255:fedora,fedoras +2256:wooden_spoon +2257:tableware +2258:mcdonald,mcdonalds +2259:policeman +2260:club +2261:center +2262:knitting_needle +2263:synagogue +2264:pantheon +2265:skeleton +2266:lawn_mower +2267:cuban +2268:kangaroo +2269:drawing +2270:salesforce +2271:cassette +2272:significance +2273:sunrise +2274:hanger +2275:phonebook +2276:muscle +2277:angle +2278:selection +2279:unicycle +2280:lifestyle +2281:robertusburg +2282:fixture +2283:locomotive +2284:landmark,landmarks +2285:tissue_paper +2286:armed +2287:decorate +2288:residents +2289:forum +2290:cafe,cafes +2291:nutella +2292:comic_book +2293:DVD,dvd +2294:pocketknife +2295:rainbow +2296:butter +2297:part,parts +2298:sony +2299:crumb +2300:new,news +2301:surf +2302:shaker +2303:canyon +2304:3i +2305:flagship +2306:yarn +2307:extinguisher +2308:parakeet +2309:glimpse +2310:attach +2311:sunlight +2312:compete +2313:moat +2314:ice_pack +2315:fintech +2316:rack,racks +2317:sell +2318:daring +2319:bonsai +2320:rhinoceros +2321:case,cases +2322:livestock +2323:chime +2324:marching +2325:typewriter +2326:olympic +2327:stir +2328:coastal +2329:musical_instrument +2330:season,seasons +2331:verdi +2332:expo +2333:branches +2334:butcher +2335:heel,heels +2336:clothesline +2337:kiwi +2338:mango +2339:pearl +2340:potatoes +2341:defender +2342:press +2343:santa +2344:ornament,ornaments +2345:shower_curtain +2346:towel_rack +2347:bulldog +2348:transportation +2349:rider,riders +2350:lamartine +2351:dirt_bike +2352:state +2353:pancake +2354:dam +2355:windsock +2356:spot +2357:Band_Aid +2358:waiter +2359:long +2360:banquet +2361:violin +2362:heron +2363:ambiance +2364:carving,carvings +2365:facility +2366:athlete,athletes +2367:birthday +2368:suspension +2369:clarinet +2370:kit +2371:recorder +2372:grove +2373:beachball +2374:trophies +2375:underclothes +2376:restroom +2377:wetland +2378:need +2379:wrapper,wrappers +2380:antengroup +2381:clasp +2382:advertising +2383:amphitheater +2384:airman +2385:dial,dials +2386:snake +2387:pool_table +2388:flowering +2389:caution +2390:warm +2391:cabinetry +2392:session +2393:Theatre,theatre,theatres +2394:run +2395:eyepatch +2396:bersih +2397:santander +2398:microwave_oven +2399:skylight +2400:bengal +2401:winding +2402:grandeur +2403:venue +2404:flamingo,flamingos +2405:stork +2406:sephora +2407:pleasing +2408:pretzel +2409:cement +2410:class +2411:playhouse +2412:underdrawers +2413:funnel +2414:leafy +2415:cookout +2416:rodent +2417:bandeau +2418:hummus +2419:orchid,orchids +2420:rig +2421:oil,oils +2422:components +2423:battle +2424:valve +2425:tomb,tombs +2426:crocodile,crocodiles +2427:asphalt +2428:full +2429:sparrow +2430:list +2431:frozen +2432:essence +2433:fume_hood +2434:cooking_utensil +2435:kitchen_sink +2436:spice_rack +2437:cider +2438:love +2439:shelve,shelves +2440:technology +2441:starfish +2442:kitchen_table +2443:dartboard +2444:foam +2445:rosary +2446:airways +2447:checkerboard +2448:countertop +2449:emblem +2450:poker,poker_,pokers +2451:blacksmith +2452:armadillo +2453:tub +2454:bougainvillea +2455:innocence +2456:stuff +2457:lasagna +2458:edge +2459:karting +2460:chloe +2461:judo +2462:veterans +2463:parka +2464:pet +2465:fil +2466:turkey,turkey_,turkeys +2467:hurdle +2468:oyster,oysters +2469:clam +2470:table-tennis_table +2471:tape_measure +2472:goalkeeper +2473:stationery +2474:clip +2475:lipstick +2476:tissue +2477:centerpiece +2478:figure,figures +2479:cadbury +2480:juventus +2481:hotplate +2482:whistle +2483:property +2484:maple +2485:cupboard +2486:parthenon +2487:brochure,brochures +2488:ketchup +2489:laundry +2490:birch +2491:terrarium +2492:crock_pot +2493:paperback +2494:shears +2495:bounds +2496:green_bean +2497:fresh +2498:bunny +2499:sponge +2500:disco +2501:check - in +2502:alsace +2503:modern +2504:shellfish +2505:narrow +2506:studio,studios +2507:rental +2508:chihuahua +2509:stapler,stapler_,staplers +2510:measure,measures +2511:wardrobe +2512:caboose +2513:dandelion +2514:paella +2515:griddle +2516:panorama +2517:department +2518:candy_bar +2519:seaside +2520:firefighter,firefighters +2521:teakettle +2522:cash_register +2523:snail,snails +2524:conveyor +2525:ponytail +2526:hogwart,hogwarts +2527:liverpool +2528:louvre +2529:document,documents +2530:message +2531:pose,poses +2532:marsh +2533:popolo +2534:choice +2535:stairwell +2536:shower_cap +2537:pollen +2538:sweatband +2539:sunglass +2540:jellyfish +2541:dresses +2542:crib +2543:iceberg,icebergs +2544:approach +2545:dairy +2546:peanut_butter +2547:coal +2548:dough +2549:disneyland +2550:roll,rolls +2551:notice +2552:bmw +2553:innut +2554:starting +2555:kbs +2556:athens +2557:racing +2558:peacock +2559:forge +2560:Toast,toast,toast_,toasts +2561:mp3 +2562:linen +2563:duvet +2564:friend,friends +2565:manga +2566:s10 +2567:playroom +2568:tinfoil +2569:brewery +2570:supplies +2571:amc +2572:daisy +2573:meadow +2574:tannery +2575:meatball +2576:deadbolt +2577:dhl +2578:utensil +2579:folder +2580:hbf +2581:music_stool +2582:diver,divers +2583:barbell +2584:headdresses +2585:service,services +2586:drying +2587:pomeranian +2588:safety_pin +2589:roofed +2590:manger +2591:nutrition +2592:magnifying +2593:sandstone +2594:bushes +2595:outfit,outfits +2596:hair_drier +2597:smoothie +2598:pattern,patterns +2599:shell,shells +2600:way +2601:reamer_ +2602:controller,controllers +2603:pirate +2604:whiteboard +2605:lecture +2606:torii +2607:teammate,teammates +2608:comb +2609:cool +2610:bundle,bundles +2611:cashew +2612:harvesting +2613:maxi +2614:moonlit +2615:sherbert +2616:bob +2617:Antiques,antique,antiques +2618:check +2619:foliage +2620:point +2621:feet +2622:circular +2623:hanging +2624:honey +2625:duffel +2626:macy +2627:gear,gears +2628:plow,plow_,plows +2629:home_plate_ +2630:batter,batter_,batters +2631:wallpaper +2632:latticework +2633:victory +2634:kiwi_fruit +2635:scoop +2636:meeting +2637:freshener +2638:epson +2639:ecommerce +2640:unicorn +2641:relaxation +2642:weisshaar +2643:present,presents +2644:character,characters +2645:amg +2646:commercial +2647:google +2648:visitors +2649:elevator +2650:thermos +2651:rag_doll +2652:soil +2653:pump +2654:antelope +2655:nun,nuns +2656:dahlia +2657:toyshop +2658:boiled_egg +2659:shallow +2660:spray +2661:classic +2662:nurse +2663:rainforest +2664:hornet +2665:gallery +2666:competition +2667:donation +2668:vegetation +2669:health +2670:aftermath +2671:intensity +2672:chip,chips +2673:nightlife +2674:lens +2675:relaxing +2676:emmy +2677:carnation +2678:mower +2679:sheepdog +2680:chessboard +2681:vodka +2682:baboon +2683:paradise +2684:geisha +2685:ant,ants +2686:behavior +2687:ghost +2688:procedure,procedures +2689:husky +2690:cleanliness +2691:glacier +2692:casket +2693:website +2694:thinkpad +2695:reenactment +2696:republic +2697:water_gun +2698:flexibility +2699:water_heater +2700:quiche +2701:exciting +2702:boom_microphone +2703:turnstile +2704:recycling +2705:kennel +2706:earth +2707:development +2708:crowbar +2709:beekeeper,beekeepers +2710:niagara +2711:polar_bear +2712:manatee +2713:hippopotamus +2714:petronas +2715:wintery +2716:cause +2717:delightful +2718:paint +2719:draw +2720:chaise_longue +2721:blossom,blossoms +2722:interview +2723:tortilla +2724:dutch +2725:artifact,artifacts +2726:kennedy +2727:checkbook +2728:tomatoes +2729:korean +2730:champagne +2731:porthole +2732:capsule +2733:system +2734:marches +2735:cornbread +2736:grey +2737:refugee +2738:downtown +2739:rear +2740:enthusiasts +2741:student,students +2742:hairstyle +2743:barbecue +2744:guard,guards +2745:purchase +2746:turret,turrets +2747:a cathedral +2748:lifeboat,lifeboats +2749:possum +2750:shaver,shaver_,shavers +2751:potter +2752:snowplow +2753:stereo,stereo_,stereos +2754:freeway +2755:process +2756:hot_sauce +2757:rainstorm +2758:holi +2759:porsche,porsches +2760:eel +2761:anvil +2762:crape +2763:fridge +2764:aromatherapy +2765:amphitheatre +2766:dimly +2767:four +2768:potholder +2769:microwave oven +2770:cheerleading +2771:coast +2772:smartwatch +2773:retaining +2774:courgette +2775:transfusion +2776:appearance +2777:alarm +2778:trick +2779:jukebox +2780:willow +2781:antler,antlers +2782:townhouses +2783:huawei +2784:jump +2785:barren +2786:bra +2787:medal,medals +2788:tombstone,tombstones +2789:dirty +2790:cartoon +2791:rein,reins +2792:industry +2793:lives +2794:braid +2795:amusing +2796:watering +2797:sauce,sauces +2798:flip +2799:circus +2800:driver,drivers +2801:rooster +2802:viewpoint +2803:fern,ferns +2804:scarecrow +2805:persimmon +2806:media +2807:Crabs,crab,crab_,crabs +2808:hydrangea,hydrangeas +2809:crowwell +2810:tsmc +2811:condom +2812:carabine +2813:durian +2814:coffeepot +2815:console +2816:shower +2817:tofu +2818:leaf +2819:trim +2820:americas +2821:participant,participants +2822:massage +2823:cell +2824:apostles +2825:seal +2826:catfish +2827:golf_club +2828:razorblade +2829:brush +2830:lamb,lamb_,lambs +2831:speedway +2832:climate +2833:clinic +2834:cib +2835:cascades +2836:skating +2837:grassland +2838:throne +2839:rim +2840:octopus +2841:jaar +2842:leisure +2843:stalks +2844:dry +2845:chainsaw +2846:review +2847:tradução +2848:sagrada +2849:bean,beans +2850:anniversary +2851:inviting +2852:intimate +2853:mtv +2854:lodge +2855:importance +2856:artichoke +2857:CD_player +2858:crash +2859:alpaca +2860:eden +2861:tuk,tuks +2862:photographer,photographers +2863:armada +2864:trellis +2865:sailing +2866:pine +2867:engaging +2868:walmart +2869:pull +2870:attic +2871:combination_lock +2872:storage +2873:bunk +2874:beetle +2875:bureau +2876:dormitory +2877:symbol,symbols +2878:demon +2879:cleaning +2880:reichsbahn +2881:grasshopper +2882:perform +2883:fry +2884:hollywood +2885:secure +2886:baggage +2887:conversation +2888:delays +2889:mission +2890:hydrant +2891:flyer +2892:facebook +2893:Starbucks,starbucks +2894:basilica +2895:challenging +2896:steering +2897:backhoe +2898:connection +2899:deichmann +2900:double +2901:excitement +2902:salsa +2903:compost +2904:abundance +2905:black_sheep +2906:reminder +2907:crouton +2908:nose +2909:radish +2910:ipad,ipads +2911:welcoming +2912:kitten,kittens +2913:needle +2914:seahorse +2915:leash +2916:clearing +2917:creation,creations +2918:mini +2919:shot_glass +2920:infirm +2921:exposure +2922:misty +2923:cluster +2924:cheer +2925:airy +2926:mickey +2927:first-aid_kit +2928:medication +2929:shift +2930:patient +2931:operate +2932:routine,routines +2933:afro +2934:infinity +2935:scrap,scraps +2936:slime +2937:powerpoint +2938:contrasting +2939:vagina +2940:cayenne,cayenne_,cayennes +2941:network +2942:usd +2943:fume +2944:passageway +2945:bacon +2946:sugar_bowl +2947:croatia +2948:comforter +2949:gymnastics +2950:trench_coat +2951:underwater +2952:gymnast +2953:base +2954:integration +2955:chipps +2956:powder +2957:fashion +2958:playpen +2959:merry +2960:kampot +2961:barber +2962:carrefour +2963:treadmill +2964:determination +2965:hand_towel +2966:Raindrops,raindrop,raindrops +2967:chest +2968:basin +2969:seesaw +2970:egg_yolk +2971:arbor +2972:residence +2973:ford +2974:saucepan +2975:totem +2976:abacus +2977:cord +2978:guys +2979:device,devices +2980:playingfield +2981:nightshirt +2982:bib +2983:toaster_oven +2984:syrup +2985:cowbell +2986:orangutan,orangutans +2987:viewing +2988:olive +2989:grid +2990:contents +2991:hedgehog +2992:levi +2993:sight,sights +2994:tosoh +2995:rialto +2996:wakeboard +2997:facilities +2998:gravy +2999:manhattan +3000:milkshake +3001:jelly_bean +3002:jelly bean +3003:owner +3004:seafloor +3005:airline,airlines +3006:recording +3007:darth +3008:quarry +3009:cosplay +3010:biology +3011:machete +3012:stuffed +3013:pothole +3014:hair_dryer +3015:sharm +3016:world +3017:price,prices +3018:war +3019:alien +3020:rusty +3021:can_opener +3022:holder +3023:tarot +3024:grate +3025:dust +3026:tournament +3027:igloo +3028:union +3029:woa +3030:functioning +3031:bartender +3032:untidy +3033:popsicle +3034:damage +3035:slum +3036:artillery +3037:manchester +3038:half +3039:reel +3040:cricketer +3041:Brownies,brownie,brownies +3042:grater +3043:shard +3044:zara +3045:leeks +3046:practice,practices +3047:gullfoss +3048:leons +3049:fall +3050:stew +3051:grapefruit +3052:amazon +3053:amount +3054:ad +3055:pre +3056:gaza +3057:removal +3058:pomegranate +3059:small +3060:bath_towel +3061:hole,holes +3062:cheese +3063:calligraphy +3064:duty +3065:balance,balances +3066:ferrari +3067:enterprise +3068:clifton +3069:cereal +3070:twin +3071:packet,packets +3072:blowfish +3073:blind +3074:shadow,shadows +3075:society +3076:bread-bin +3077:emergencies +3078:xylophone +3079:plan +3080:roast +3081:collar +3082:milka +3083:disposal +3084:coffin +3085:vat +3086:string,strings +3087:availability +3088:tiananmen +3089:pilot,pilots +3090:collage +3091:storm +3092:nighttime +3093:dishwasher +3094:snowshoe +3095:one,ones +3096:costa +3097:rover +3098:cypress +3099:footstall +3100:pride +3101:egret +3102:milky +3103:ballerina +3104:aktiengesellschaft +3105:drain +3106:scrubbing_brush +3107:degree +3108:shredder,shredder_,shredders +3109:ephesus +3110:handful +3111:legume +3112:stethoscope +3113:vastness +3114:CD,cd +3115:disc +3116:reno +3117:mill +3118:joystick +3119:chevrolet +3120:piggy +3121:edmonton +3122:vigil +3123:burrito +3124:bitcoin +3125:brownstone +3126:macaque +3127:birthday_cake +3128:complex +3129:corkscrew +3130:entertainment +3131:chipmunk +3132:poppy +3133:confetti +3134:witch +3135:tailor +3136:temperature +3137:coach +3138:swivel +3139:nailfile +3140:tori +3141:canteen +3142:direction,directions +3143:banjo +3144:bud +3145:china +3146:snowmen +3147:efforts +3148:toronto +3149:babies +3150:kebab +3151:car_battery +3152:balancer +3153:pasture +3154:invitation +3155:cockpit +3156:martini +3157:brooklyn +3158:telus +3159:Waffles,waffle,waffles +3160:laser +3161:tabby +3162:reality +3163:celebrate +3164:germany +3165:raven +3166:predators +3167:videotape +3168:gothic +3169:preparation +3170:mangrove +3171:plain +3172:actor +3173:scouting +3174:charger +3175:venice +3176:ditch +3177:jal +3178:hen +3179:pianist +3180:subaru +3181:t-72 +3182:carr +3183:cheerleader,cheerleaders +3184:charm +3185:torch +3186:peak,peaks +3187:matchbox +3188:cultivation +3189:squid,squid_,squids +3190:succulent,succulents +3191:quadcopter +3192:dentist +3193:impression +3194:caliper +3195:duckling +3196:chickpea +3197:athletic +3198:tim +3199:vacuum +3200:disney +3201:assembly +3202:mess +3203:pheasant +3204:karlovy +3205:handlebars +3206:refreshing +3207:lobster,lobsters +3208:tattoo +3209:boar +3210:exchange +3211:stock +3212:safari +3213:sharpie +3214:birthday_card +3215:Tabasco_sauce +3216:peterbilt +3217:search +3218:tel +3219:justice +3220:atomizer +3221:lily +3222:mulch +3223:rake +3224:hyundai +3225:bloomberg +3226:dubai +3227:burj +3228:bean_curd +3229:denmark +3230:mussel +3231:playtime +3232:hunting +3233:knit +3234:a net +3235:antarctica +3236:passersby +3237:tulip,tulips +3238:murder +3239:warmth +3240:glaze +3241:mashed_potato +3242:sour_cream +3243:subwoofer +3244:moss +3245:centipede +3246:cute +3247:chalice +3248:observatory +3249:tanker +3250:cigar,cigars +3251:saxophonist +3252:sax +3253:crest +3254:harbour +3255:sashimi +3256:thrift +3257:calf +3258:masai +3259:clogs +3260:cafeteria +3261:raceway +3262:omelet +3263:sneaker,sneakers +3264:shelving +3265:arrival,arrivals +3266:hiker,hikers +3267:opel +3268:j +3269:woodpecker +3270:michael +3271:refinery +3272:privata +3273:chalk,chalks +3274:cherokee +3275:clementine +3276:doctor,doctors +3277:bridle +3278:toiletry +3279:lock,locks +3280:convoy +3281:brake +3282:scuba +3283:historic +3284:superhero +3285:metro +3286:cinema +3287:investors +3288:take +3289:domino +3290:junk +3291:multitude +3292:moor +3293:chrysanthemum,chrysanthemums +3294:daisies +3295:boutique +3296:servicescape +3297:extent +3298:rat +3299:formula +3300:bulb,bulbs +3301:prison +3302:harley +3303:arm,arms +3304:hug +3305:outage +3306:government +3307:tide +3308:demolition +3309:puppy +3310:outlet,outlets +3311:juggle +3312:mode +3313:hatbox +3314:crop,crops +3315:blaster +3316:teacher +3317:johnny +3318:fungi +3319:form +3320:shotgun +3321:prada +3322:selfridges +3323:bubble_gum +3324:passage +3325:columbia +3326:fight +3327:claim +3328:bodybuilding +3329:radion +3330:perch +3331:birdfeeder +3332:bumblebee +3333:eggbeater +3334:letter,letters +3335:rendering +3336:rhododendron +3337:deliver +3338:susans +3339:axe,axes +3340:comfort +3341:king +3342:cooper +3343:article,articles +3344:diverse +3345:rv +3346:timeless +3347:resilience +3348:bosphorus +3349:dumpling,dumplings +3350:seaweed +3351:cristiano +3352:combination +3353:crescent_roll +3354:h +3355:cleanup +3356:landscaping +3357:boxes +3358:circa +3359:aqueduct +3360:tropical +3361:blooming +3362:picket +3363:garment,garments +3364:personnel +3365:winery +3366:velvet +3367:skytopolis +3368:centre +3369:dump +3370:gardening +3371:prayer +3372:silhouette,silhouettes +3373:ecosport +3374:fitness +3375:dealers +3376:hookah +3377:businesses +3378:workstation +3379:adventure +3380:preacher +3381:riot +3382:messenger +3383:maritime +3384:egg_roll +3385:bridesmaid,bridesmaids +3386:inscription +3387:fig +3388:fiverr +3389:layer +3390:toll +3391:shuttle +3392:meerkat +3393:boosters +3394:vial,vials +3395:khaki +3396:budgeting +3397:water_faucet +3398:prowess +3399:pug-dog +3400:knex +3401:branding +3402:scythe +3403:build +3404:treasure +3405:grain +3406:illumio +3407:effect,effects +3408:fossil +3409:ping-pong_ball +3410:sander +3411:appointment,appointments +3412:vatican +3413:sill +3414:signpost +3415:file_cabinet +3416:tribute +3417:euro,euros +3418:fedex +3419:space_shuttle +3420:tuxedo +3421:falcon +3422:texture +3423:dragonfly +3424:leopard +3425:walker +3426:memorabilia +3427:entertainer +3428:embrace +3429:badminton +3430:printing +3431:blade +3432:pea,pea_,peas +3433:quaint +3434:sprinkle +3435:towering +3436:positions +3437:bloom,blooms +3438:welding +3439:mint_candy +3440:calendula +3441:countries +3442:rainfall +3443:others +3444:glider +3445:aerosol +3446:makeup +3447:clipper,clippers,clippers_ +3448:sip +3449:czech +3450:lay +3451:leg,legs +3452:prix +3453:virgin +3454:airpods +3455:couches +3456:effectiveness +3457:kitty +3458:crow +3459:gourmet +3460:skype +3461:context +3462:bookshop +3463:clutch_bag +3464:souvenir,souvenirs +3465:iguana +3466:kick +3467:prague +3468:urinal +3469:crime +3470:blossoming +3471:oakley +3472:waste +3473:potiers +3474:spiral +3475:piazza +3476:scallop +3477:macaw +3478:inter +3479:olive_oil +3480:yurt +3481:stable +3482:contractors +3483:baseball_base +3484:families +3485:broadcasting +3486:transition +3487:stanford +3488:curling_iron +3489:traveler,travelers +3490:popcorn +3491:candies +3492:nvidia +3493:test +3494:kneels +3495:address +3496:india +3497:community +3498:kfc +3499:hospice +3500:pendulum +3501:vacation +3502:comminity +3503:diving_board +3504:industrial +3505:metrocard +3506:benz +3507:whiskey +3508:provider +3509:unit +3510:neckband +3511:lindbergh +3512:panerai +3513:gnome +3514:champion,champions +3515:dream,dreams +3516:stretch +3517:flakes +3518:shoulder,shoulders +3519:payment +3520:battleship +3521:bet +3522:burst +3523:cutout +3524:appreciation +3525:coinex +3526:sleep +3527:reading +3528:investment +3529:kart,karts +3530:walgreens +3531:earthquake +3532:quran +3533:balenciaga +3534:banking +3535:fagioli +3536:shipwreck +3537:genex +3538:Shakespeare +3539:pilgrim +3540:online +3541:ripe +3542:boxer,boxers +3543:vaccination +3544:vodafone +3545:rangoli +3546:coventry +3547:dell +3548:mustard +3549:potential +3550:chocolate_cake +3551:gateway +3552:doha +3553:visa +3554:use,uses +3555:koi +3556:st +3557:remnant,remnants +3558:encounter +3559:food_processor +3560:botanist +3561:scores +3562:aston +3563:vivid +3564:lug +3565:capitol +3566:burnt +3567:bentley +3568:ponce +3569:keng +3570:guillotine +3571:odeon +3572:dusty +3573:poland +3574:rubble +3575:lockdown +3576:seminar +3577:€ 1bn +3578:democracy +3579:homeless +3580:sling,sling_,slings +3581:photography +3582:stocking +3583:cove +3584:creme +3585:salvation +3586:renovation,renovations +3587:porcelain +3588:retailer +3589:geography +3590:gameboard +3591:block,blocks +3592:autumnal +3593:atmos +3594:mint,mints +3595:hobbit +3596:injection +3597:rohingya +3598:telephoto_lens +3599:ideology +3600:jetblue +3601:rod +3602:a mural +3603:soup_bowl +3604:libra +3605:pack,packs +3606:mandarin_orange +3607:jog +3608:chevy +3609:wadi +3610:alcove +3611:limousine +3612:teeth +3613:lead +3614:timberland +3615:premiere +3616:mandala +3617:centrifuge +3618:stylist +3619:durability +3620:cheetah +3621:engineering +3622:lisbon +3623:move,moves +3624:highchair +3625:browser +3626:sweet,sweets +3627:kansas +3628:corkboard +3629:rex +3630:humanatura +3631:siem +3632:sharpener +3633:wedding_cake +3634:toca +3635:juicy +3636:hoka +3637:conflict +3638:ferret +3639:billiard +3640:proximity +3641:charles +3642:beaker +3643:burning +3644:microsoft +3645:swab +3646:mi +3647:funeral +3648:victims +3649:montblanc +3650:soupspoon +3651:bites +3652:census +3653:orchestra +3654:logistics +3655:magnolia +3656:bullring +3657:pigtail +3658:impact +3659:dangote +3660:tranquility +3661:combine +3662:repair +3663:cream_pitcher +3664:prop +3665:foil +3666:steamy +3667:fluffy +3668:procentown +3669:bill,bills +3670:zombie +3671:stonework +3672:brooch +3673:abesses +3674:playing +3675:twilight +3676:snorkel +3677:escargot +3678:chamber +3679:denominations +3680:starry +3681:synergy +3682:abbercrombie +3683:dji +3684:daughter,daughters +3685:artist +3686:bafta +3687:noodle,noodles +3688:fishmongers +3689:hourglass +3690:rolling_pin +3691:mortar +3692:plaster +3693:heineken +3694:patent +3695:bw +3696:achievement,achievements +3697:renault +3698:chocolate_milk +3699:charcoal +3700:huddle +3701:digger +3702:multiple +3703:potala +3704:captain +3705:range +3706:brazil +3707:guidebook +3708:b +3709:leafless +3710:hamster +3711:thimble +3712:livery +3713:tackle +3714:mitsubishi +3715:experiment +3716:wrap +3717:tallit +3718:fog +3719:footprints +3720:hippo +3721:schoolgirls +3722:jail +3723:surveying +3724:delivery +3725:pasta +3726:bunk_bed +3727:complexes +3728:relief,reliefs +3729:homework +3730:record +3731:ecuador +3732:demonstrator +3733:$ 1bn +3734:longtail +3735:afternoon +3736:turn,turns +3737:chase +3738:pylons +3739:puffer +3740:motorsports +3741:haircut +3742:tooth +3743:kingsbau +3744:flops +3745:roadside +3746:thru +3747:merchandise +3748:sewer +3749:dot,dots +3750:manner,manners +3751:surrey +3752:chisel +3753:beachgoers +3754:wat +3755:change +3756:casserole +3757:vuitton +3758:scheme +3759:super +3760:austria +3761:chanel +3762:supercar +3763:zuora +3764:tasks +3765:introduction +3766:dental_floss +3767:depot +3768:opponents +3769:piggy_bank +3770:celebratory +3771:flare,flares +3772:brickwork +3773:nectarine +3774:hula +3775:comforting +3776:dagger +3777:florida +3778:otter +3779:wilderness +3780:web +3781:riverbank +3782:volunteer +3783:york +3784:substation +3785:victoria +3786:flask +3787:smokestack,smokestacks +3788:honor +3789:push +3790:cosmonaut +3791:copper +3792:brewing +3793:menorah +3794:adidas +3795:stain +3796:fit +3797:salt +3798:audi +3799:sensors +3800:aspects +3801:navigation +3802:cooker +3803:cold +3804:big +3805:operation +3806:moose +3807:depths +3808:gator +3809:hoe +3810:reichstag +3811:infrastructure +3812:abstract +3813:awareness +3814:barcelona +3815:bvlgari +3816:maternity +3817:mac +3818:treasury +3819:supply +3820:public +3821:grade +3822:mallard +3823:cbb +3824:chalet +3825:maze +3826:hoop,hoops +3827:azure +3828:establishment,establishments +3829:dickens +3830:scroll +3831:summer +3832:delicacies +3833:participation +3834:creepy +3835:volcano +3836:allianz +3837:ban +3838:minnie +3839:cisco +3840:competitor +3841:philips +3842:aloe +3843:crosses +3844:mexico +3845:loincloth +3846:handstand +3847:strawberries +3848:congregation +3849:beachside +3850:lemur +3851:rhine +3852:vacant +3853:saint,saints +3854:gt2 +3855:signing +3856:chemical,chemicals +3857:dinner +3858:exploration +3859:trafigura +3860:treehouse +3861:slippery +3862:satay +3863:spread +3864:weed +3865:brass +3866:confidence +3867:paddy +3868:july +3869:insulation +3870:anemone +3871:cheesecake +3872:hind +3873:blueberries +3874:queen +3875:lifting +3876:level,levels +3877:height +3878:pike +3879:lilies +3880:hustle +3881:call +3882:firefighting +3883:outlook +3884:baker +3885:bimex +3886:samoyed +3887:reflect +3888:phonograph +3889:checkpoint +3890:tiktok +3891:eye,eyes +3892:response +3893:poinsettia +3894:broadway +3895:wildebeest +3896:coca +3897:cep +3898:punch +3899:backup +3900:liberty +3901:walkie +3902:caramel +3903:berkshka +3904:clover +3905:reptile +3906:triumph +3907:fragrances +3908:slot +3909:amad +3910:marvel,marvels +3911:fancy +3912:airship +3913:sound +3914:hide,hides +3915:futon +3916:sailor +3917:montreal +3918:murraya +3919:cellar +3920:deep +3921:russia +3922:tinder +3923:playstation +3924:vienna +3925:bta +3926:mail +3927:triathlon +3928:pakistani +3929:gelatin +3930:citywalk +3931:diner +3932:tax +3933:bowler +3934:finger,fingers +3935:graph +3936:investigation +3937:jetliner +3938:wildlife +3939:dedication +3940:riverbed +3941:striking +3942:nap +3943:dv +3944:installation +3945:basement +3946:phoenix +3947:bunker +3948:hatta +3949:channel +3950:waltz +3951:fieglhiller +3952:genesis +3953:son +3954:hong +3955:breast +3956:isolation +3957:dalhousie +3958:pudding +3959:revenue +3960:belgrado +3961:peeler_ +3962:alcazar +3963:inflation +3964:september +3965:applesauce +3966:rolex +3967:haystack +3968:foreground +3969:gecko +3970:shake +3971:economy +3972:graham +3973:hair_curler +3974:teenagers +3975:milford +3976:pipe_bowl +3977:daylight +3978:petunia,petunias +3979:indonesia +3980:target,targets +3981:labor +3982:yolk +3983:udder +3984:florence +3985:graze +3986:collibra +3987:cigna +3988:observation +3989:signal +3990:bolivia +3991:pennies +3992:simple +3993:nissan +3994:xnxx +3995:vulture +3996:perches +3997:hunter +3998:quartz +3999:record_player +4000:suburb +4001:pavillon +4002:avia +4003:jay +4004:oasis +4005:manufacturing +4006:fitbit +4007:roadster +4008:measuring +4009:hazy +4010:networking +4011:pizzeria +4012:cameo +4013:ironwork +4014:deutsche +4015:embroidery +4016:drug,drugs +4017:snowblower +4018:creek +4019:surfing +4020:regal +4021:mandarin +4022:nation,nations +4023:marketing +4024:careless +4025:fundraiser +4026:coleslaw +4027:kids +4028:update +4029:pokemon +4030:firewood +4031:humboldt +4032:quality +4033:jaguar +4034:styrofoam +4035:publication +4036:discovery +4037:myer +4038:transformer,transformers +4039:toyota +4040:machinery +4041:domed +4042:outback +4043:motel +4044:wool +4045:blank +4046:manicure +4047:stanley +4048:gray +4049:clinique +4050:roadway +4051:juicer +4052:issue,issues +4053:blur +4054:fidelity +4055:nostalgic +4056:cola +4057:lenovo +4058:crawfish +4059:caterpillar +4060:qatar +4061:migrants +4062:relaxed +4063:vanilla +4064:amalfi +4065:route +4066:goalpost +4067:thistle +4068:aid +4069:novels +4070:running +4071:some +4072:dior +4073:morocco +4074:skin +4075:trip +4076:culture +4077:soy +4078:bma +4079:birth +4080:sunnuck +4081:shopify +4082:honda +4083:yahoo +4084:tartlets +4085:startup +4086:mixture +4087:tense +4088:furry +4089:peony +4090:pod +4091:border +4092:schloss +4093:buckingham +4094:harvest +4095:pepco +4096:tabletop +4097:icy +4098:transport +4099:siemens +4100:research +4101:millennium +4102:wrestle +4103:bumper +4104:researcher +4105:husband +4106:zurichchina +4107:flooring +4108:dj +4109:moscow +4110:growth +4111:seed,seeds +4112:lottery +4113:depicting +4114:barnes +4115:singers +4116:alibaba +4117:grammy,grammys +4118:thick +4119:cut,cuts +4120:sams +4121:heuer +4122:Wii +4123:july,2017 +4124:workout +4125:gavel +4126:floss +4127:varieties +4128:shipment +4129:bustle +4130:daytime +4131:energizer +4132:budhi +4133:valentino +4134:unity +4135:skincare +4136:pantry +4137:aurora +4138:prepaid +4139:hilton +4140:blash +4141:centrify +4142:karen +4143:cyber +4144:gucci +4145:neat +4146:smile,smiles +4147:fillmore +4148:filmores +4149:insurance +4150:acropolis +4151:precaution +4152:fuchsia +4153:access +4154:bakin +4155:print +4156:roach +4157:bt5 +4158:examination +4159:surgery +4160:relax +4161:helipad +4162:source +4163:grasses +4164:diary +4165:rum +4166:tradingview +4167:outing +4168:respect +4169:cradle +4170:nordstrom +4171:disk +4172:sewing +4173:tesla +4174:cheerful +4175:thailand +4176:gerbera +4177:caxa +4178:milhões +4179:supercenter +4180:politicians +4181:chicago +4182:capital +4183:cities +4184:venetian +4185:gundam +4186:moving +4187:pebble +4188:patrons +4189:tyne +4190:cinemax +4191:sugar +4192:crater +4193:breakfast +4194:incubator +4195:nozzle +4196:cocoa,cocoa_,cocoas +4197:rights +4198:farmland +4199:mumbai +4200:walking +4201:trinkets +4202:pure +4203:suites +4204:pawn +4205:balvenie +4206:nathan +4207:salad_plate +4208:algarve +4209:millions +4210:hardback_book +4211:century +4212:revera +4213:snapchat +4214:contain +4215:bangkok +4216:artistry +4217:gin +4218:humvee +4219:amtek +4220:embassy +4221:galeria +4222:tap +4223:vaccine +4224:vancouver +4225:pepsi +4226:support +4227:convenient +4228:spans +4229:miss +4230:weeknd +4231:rest +4232:lloyd +4233:taco +4234:nes +4235:origami +4236:oscars +4237:learn +4238:triomphe +4239:belgium +4240:horseman +4241:coupang +4242:quote +4243:countless +4244:retro +4245:boarding +4246:slate +4247:story +4248:imports +4249:pharmacies +4250:grazing +4251:fir +4252:skittles +4253:thing,things +4254:shoelace +4255:inkpad +4256:rhinoceroses +4257:solutions +4258:zte +4259:snowdrops +4260:france +4261:Scotiabank +4262:charter +4263:magpie +4264:oak +4265:eatery +4266:wearing +4267:ticketing +4268:tiber +4269:prom +4270:voter +4271:editor +4272:incident +4273:bait +4274:contributions +4275:means +4276:evergreen +4277:boiler,boilers +4278:brussels +4279:satellite +4280:bullfighter +4281:pokémon +4282:stripe +4283:lidl +4284:midpoint +4285:canon +4286:instagram +4287:twig +4288:relocation +4289:murky +4290:raccoon +4291:curvy +4292:marijuana +4293:puff +4294:panama +4295:sandcastles +4296:floral +4297:little +4298:juxtaposition +4299:sphinx +4300:tonnons +4301:bark +4302:blurry +4303:tea_bag +4304:sheikh +4305:blonde +4306:wrist +4307:tobacco +4308:griffith +4309:province +4310:streaming +4311:squeeze +4312:obcomm +4313:handout +4314:wisteria +4315:toiletries +4316:masher +4317:slice +4318:musket +4319:wfp +4320:technician +4321:toucan +4322:shooting +4323:prune +4324:production +4325:fillets +4326:ameritrade +4327:azad +4328:sheaf +4329:webpage +4330:sofa_bed +4331:neptunes +4332:scotland +4333:joyful +4334:benedict +4335:science +4336:coaches +4337:ventilator +4338:alps +4339:danger,dangers +4340:section +4341:college +4342:meditating +4343:humans +4344:alfa +4345:pollution +4346:election +4347:sandwiches +4348:southwest +4349:distillery +4350:newfoundland +4351:bonita +4352:irrigation +4353:commemorates +4354:hbo +4355:user +4356:galleria +4357:sauna +4358:blacksmithing +4359:repellent +4360:evolutionfit +4361:doge +4362:kazakhstan +4363:mysteries +4364:drift +4365:trapeze +4366:language +4367:foosball +4368:electrolux +4369:choir +4370:two fish +4371:motherboard +4372:greek +4373:fiction +4374:armour +4375:blindfolds +4376:bales +4377:merlion +4378:bishop +4379:raisin +4380:district +4381:loft +4382:empire +4383:deluxe +4384:mass +4385:christie +4386:codes +4387:teamwork +4388:workings +4389:valmont +4390:shantytown +4391:netherlands +4392:employees +4393:harvester +4394:rust +4395:lounging +4396:magsafe +4397:wonder +4398:jazz +4399:density +4400:coop +4401:cleat_ +4402:kirin +4403:baguette +4404:chart +4405:carburetor +4406:latam +4407:gum +4408:staszow +4409:hsbc +4410:mantle +4411:crystal +4412:beachfront +4413:clemson +4414:khalifa +4415:dwellings +4416:hero +4417:cub,cub_,cubs +4418:academy +4419:sunsweet +4420:tots +4421:harness +4422:dressing +4423:screening +4424:coral +4425:sprint +4426:lineup +4427:hugo +4428:streetcar +4429:Verizon,verizon +4430:smoking +4431:compair +4432:launcher +4433:acquisisce +4434:graphic,graphics +4435:duct +4436:cosmos +4437:muslim +4438:pencil_box +4439:shepherd_dog +4440:table-tennis +4441:hamas +4442:kremlin +4443:spin +4444:totos +4445:peugeot +4446:pointing +4447:ces +4448:functionality +4449:combat +4450:muzzle +4451:vapormax +4452:order +4453:adultery +4454:veganas +4455:douro +4456:Sainsburys +4457:anna +4458:transaction +4459:john +4460:handrail +4461:footballer +4462:travelodge +4463:maxx +4464:posing +4465:death +4466:26bn +4467:standing +4468:microprocessor +4469:casing +4470:tissot +4471:trading +4472:Superdrug +4473:getty +4474:wales +4475:transit +4476:cartridge +4477:pegaworld +4478:fist +4479:chainlink +4480:swinge +4481:future +4482:handmade +4483:powerball +4484:striped +4485:por +4486:care +4487:president +4488:vattenfall +4489:representation +4490:ratio +4491:birthplace +4492:homemade +4493:angkor +4494:simpsons +4495:launching +4496:meditation +4497:quick +4498:inventory +4499:ventilation +4500:circo +4501:packaging +4502:zone +4503:steep +4504:palanquin +4505:bleak +4506:pastries +4507:citigroup +4508:monopoly +4509:romania +4510:workplace +4511:selfie +4512:sus +4513:karl +4514:thehindu +4515:feel +4516:carbon +4517:mizuno +4518:shenzhen +4519:thumbs +4520:lundunata +4521:matters +4522:pita_ +4523:lingerie +4524:cinnamon +4525:karate +4526:racquet +4527:leader +4528:hershey +4529:usda +4530:blower +4531:semi +4532:grind +4533:religious +4534:colonnades +4535:diamond +4536:snowflake +4537:paddleboard +4538:empty +4539:macbook +4540:hypercar +4541:cliffside +4542:moisturizer +4543:pilgrimage +4544:various +4545:guacamole +4546:wellness +4547:jakarta +4548:picker +4549:rejection +4550:album,albums +4551:win +4552:noccibe +4553:sideline +4554:arizona +4555:banister +4556:tapawera +4557:symphony +4558:bistro +4559:dick +4560:Tastes +4561:hydro +4562:majority +4563:mbc +4564:snowball +4565:engie +4566:nasdaq +4567:Oneplus +4568:sandisk +4569:bowling +4570:ukraine +4571:swells +4572:macaroni +4573:act +4574:blow +4575:fuel +4576:humayun +4577:woodworking +4578:magician +4579:enfield +4580:lotterias +4581:lg +4582:europa +4583:furrows +4584:kiev +4585:strasbourg +4586:hello +4587:greece +4588:handling +4589:distancing +4590:feast +4591:tribe +4592:loop,loops +4593:climber +4594:mri +4595:shipyard +4596:index +4597:manuscript +4598:vegas +4599:hummingbird +4600:malay +4601:vaporizer +4602:spooky +4603:swarm +4604:microsystems +4605:bedpan +4606:reporter +4607:palestine +4608:handicap +4609:hardtop +4610:decathlon +4611:birkenau +4612:guinness +4613:donnarumma +4614:servicepoint +4615:cardinals +4616:fisheye +4617:dip +4618:michigan +4619:defibrillator +4620:destruction +4621:processing +4622:brawl +4623:rockstar +4624:algae +4625:wargame +4626:pont +4627:bouncy +4628:throw +4629:campaign +4630:opulent +4631:multimeter +4632:gps +4633:discount +4634:climbing +4635:weight,weights +4636:technologies +4637:tasty +4638:neglect +4639:savanna +4640:mariachi +4641:guests +4642:iowa +4643:goalie +4644:ages +4645:grocers +4646:wealth +4647:cranberries +4648:nugget +4649:drumming +4650:clash +4651:turquoise +4652:makita +4653:kiss +4654:express +4655:petrofac +4656:engraving +4657:karcher +4658:pikachu +4659:cvs +4660:pug +4661:gymnasium +4662:moto +4663:gardener +4664:total +4665:hawk +4666:stix +4667:ios +4668:decay +4669:Macarons,macarons +4670:pavers +4671:shoppee +4672:crumbling +4673:evangelists +4674:chefchaouen +4675:poodle +4676:reliance +4677:lying +4678:filtering +4679:specialties +4680:itunes +4681:processor +4682:imposing +4683:riyal +4684:tilework +4685:sunshine +4686:shaggy +4687:ruffle +4688:obstruction +4689:carcass +4690:smartwatches +4691:opulence +4692:departure +4693:bolo +4694:hives +4695:badger +4696:success +4697:shoal +4698:boss +4699:hazards +4700:sonobuoyo +4701:polish +4702:avvala +4703:pittsburgh +4704:kayakers +4705:wildfire +4706:domain +4707:festivities +4708:animation +4709:immigration +4710:progress +4711:triangle +4712:waffle_iron +4713:watercraft +4714:blowing +4715:gouda +4716:emirates +4717:berlin +4718:traditional +4719:purchasing +4720:saudi +4721:bomb +4722:winners +4723:tactics +4724:amor +4725:flour +4726:wordpress +4727:grow +4728:echo +4729:vote +4730:furnishings +4731:rigener +4732:page +4733:rower +4734:eclipse +4735:abbey +4736:dye +4737:zip +4738:quiz +4739:diagram +4740:chimpanzee +4741:f430 +4742:underground +4743:father +4744:garbanzo +4745:newlywed +4746:saturn +4747:terracotta +4748:hoover +4749:coronavirus +4750:excavation +4751:trio +4752:elements +4753:spain +4754:burman +4755:geese +4756:remodel +4757:collapse +4758:navy +4759:everton +4760:renasance +4761:testing +4762:elegant +4763:currencies +4764:medallion +4765:menara +4766:font +4767:liner +4768:lattice +4769:tabasco +4770:viewer +4771:spelling +4772:continent +4773:charge +4774:attendees +4775:velodrome +4776:finlandia +4777:arrowhead +4778:grits +4779:heliift +4780:tee +4781:diving +4782:daffodil,daffodils +4783:eos +4784:sprout +4785:avenue +4786:kaaba +4787:fallen +4788:encryption +4789:beatles +4790:plantagen +4791:switzerland +4792:giant +4793:volksbank +4794:glitter +4795:puncher +4796:eat +4797:tesco +4798:virginia +4799:todaiji +4800:muellengo +4801:longines +4802:soundlink +4803:aroma +4804:high +4805:depiction +4806:pay +4807:bbc +4808:education +4809:law +4810:overlook,overlooks +4811:tutu +4812:oia +4813:granola +4814:vale +4815:aerial +4816:haze +4817:bangs +4818:burial +4819:bluebells +4820:greyhound +4821:norway +4822:colgate +4823:guides +4824:diversity +4825:- up +4826:vane +4827:stonex +4828:montevideo +4829:wattle +4830:gembong +4831:opinions +4832:voice +4833:sesame +4834:hanjin +4835:finland +4836:tropic +4837:responsibility +4838:colossion +4839:expenses +4840:auckland +4841:whisk +4842:sake +4843:athleticism +4844:recliner +4845:lincoln +4846:planner +4847:parangriti +4848:conservation +4849:rothenburg +4850:dwarfs +4851:political +4852:papa +4853:a rose +4854:hot +4855:vapor +4856:phuket +4857:beaver +4858:kingdom +4859:cuckoo +4860:rafting +4861:clap +4862:walk - in +4863:pulp +4864:archers +4865:broadcast +4866:frappuccino +4867:server +4868:motorrad +4869:composition +4870:gel +4871:bunnings +4872:haunting +4873:heroes +4874:share +4875:apparel +4876:conditioning +4877:explosion +4878:mustang +4879:colone +4880:aviation +4881:value +4882:shadowy +4883:nots +4884:collector +4885:england +4886:clutter +4887:gag +4888:itza +4889:commission +4890:betting +4891:nasa +4892:chassis +4893:login +4894:savings +4895:punt +4896:rani +4897:cellist +4898:cacti +4899:commitment +4900:paddleboat +4901:henna +4902:topping +4903:kathmandu +4904:hijabs +4905:rubik +4906:smart +4907:korea +4908:netting +4909:oracle +4910:strength +4911:carve +4912:fendi +4913:hudson +4914:fairway +4915:icicles +4916:firenze +4917:supporter +4918:wherever +4919:groceries +4920:aldi +4921:taipei +4922:freshness +4923:kind +4924:congo +4925:mulberry +4926:cambridge +4927:pacific +4928:recreation +4929:scouts +4930:mouthwash +4931:streetlamp +4932:marshmallow +4933:california +4934:reebok +4935:halfords +4936:signature +4937:oxford +4938:abu +4939:spark,sparks +4940:boi +4941:flow +4942:barley +4943:showhome +4944:artikel +4945:fleur +4946:purolator +4947:spicy +4948:length +4949:poignant +4950:dart +4951:milbone +4952:sunbeam +4953:strip +4954:kors +4955:missoni +4956:geyser +4957:poolside +4958:look +4959:sheeting +4960:watches +4961:mantellassi +4962:guaraná +4963:brandy +4964:malfunction +4965:grapevines +4966:pope +4967:indo +4968:wildflower +4969:remains +4970:hieroglyphs +4971:canilava +4972:species +4973:trump +4974:manor +4975:handprint +4976:trailhead +4977:hopscotch +4978:david +4979:singing +4980:shanghai +4981:mangoes +4982:hue,hues +4983:segovia +4984:stryker +4985:mrt +4986:fin +4987:newborn +4988:seater +4989:maersk +4990:thunder +4991:cbc +4992:anthologies +4993:rockefeller +4994:epic +4995:sealand +4996:stewart +4997:recreational +4998:asics +4999:wipes +5000:cleveland +5001:chance +5002:moai +5003:efficient +5004:title +5005:beet +5006:renaissance +5007:maison +5008:leak +5009:lira +5010:responders +5011:indomaret +5012:headline +5013:datar +5014:promotion +5015:mark,marks +5016:talkie +5017:lagerfeld +5018:popularity +5019:scorpion +5020:Paragliders,paragliders +5021:trout +5022:kintex +5023:lumia +5024:e - plus +5025:learning +5026:bnp +5027:management +5028:türkenstraße +5029:bain +5030:ukulele +5031:crepe +5032:embankment +5033:allexpress +5034:meizu +5035:m6s +5036:sandbox +5037:francisco +5038:loading +5039:launch +5040:initiative +5041:huntsman +5042:resurfacing +5043:handshaking +5044:swatch +5045:iff +5046:blackpool +5047:overcoat +5048:shutterstock +5049:taman +5050:descriptions +5051:gondoliers +5052:wafer +5053:rabbi +5054:hyper +5055:damp +5056:l'oreal +5057:okra +5058:Superheroes +5059:unfold +5060:tequila +5061:celo +5062:milan +5063:sculptor +5064:sprayer +5065:carp +5066:maneuvers +5067:smartlab +5068:blog +5069:breguet +5070:ace +5071:corona +5072:alldays +5073:kashmiri +5074:entranceway +5075:yes +5076:role +5077:kuwait +5078:styling +5079:Watsons +5080:dermalow +5081:illusion +5082:bulgaria +5083:delicate +5084:peppa +5085:speech +5086:saree +5087:gmc +5088:nook +5089:basil +5090:dishsoap +5091:amd +5092:ashes +5093:fishermen +5094:mississippi +5095:magnificent +5096:tipper +5097:furnace +5098:confederate +5099:neck +5100:airbus +5101:concerns +5102:porter +5103:butterflies +5104:briefs +5105:azadi +5106:nfl +5107:steamboat +5108:sector +5109:quesadilla +5110:weaving +5111:muscat +5112:canvas +5113:notification +5114:par +5115:minion +5116:factories +5117:crunchy +5118:barbershops +5119:infield +5120:collectibles +5121:outcropping +5122:courtroom +5123:canary +5124:walrus +5125:wework +5126:donald +5127:stallhofen +5128:crock +5129:gap +5130:focus +5131:software +5132:mardi +5133:der +5134:tata +5135:tik +5136:chuck +5137:crumble +5138:coliseum +5139:sunburst +5140:swastika +5141:gods +5142:kellogg +5143:vet +5144:deezer +5145:rochelinho +5146:shining +5147:kneeling +5148:bolo_tie +5149:minh +5150:huggies +5151:dawn +5152:release +5153:jousting +5154:housing +5155:accessories +5156:cheeseburger +5157:feeder +5158:latte +5159:cartier +5160:nintendo +5161:hardback +5162:petroglyphs +5163:marine +5164:elegance +5165:glassblowing +5166:c +5167:sawdust +5168:brexit +5169:azz +5170:vista +5171:cob +5172:depo +5173:past +5174:des +5175:moneygram +5176:protection +5177:pinball +5178:paycheck +5179:crevice +5180:neymar +5181:putt +5182:washing +5183:alipay +5184:friendship +5185:cordless +5186:diy +5187:communist +5188:daimler +5189:handicrafts +5190:teleferic +5191:totoro +5192:eps +5193:trends +5194:mercadona +5195:clubhouse +5196:discussion +5197:bonobo +5198:azalea +5199:lycee +5200:currant +5201:gopro +5202:silk +5203:format +5204:expression +5205:gala +5206:versatility +5207:horseshoe +5208:divider +5209:enforcement +5210:pickett +5211:costco +5212:stalactites +5213:assange +5214:alcatraz +5215:valor +5216:autodesk +5217:macau +5218:barracks +5219:emporio +5220:registration +5221:philadelphia +5222:bottlega +5223:cockatoo +5224:bravery +5225:citroen +5226:wisma +5227:guinea +5228:biking +5229:firemen +5230:mobis +5231:nesco +5232:lavandes +5233:riding +5234:puma +5235:stretches +5236:batteries +5237:dazs +5238:francis +5239:swordfish +5240:oculus +5241:bottom +5242:reserve +5243:ho +5244:account +5245:deltata +5246:beige +5247:coles +5248:cravings +5249:sonic +5250:davidson +5251:dryers +5252:rescuer diff --git a/mask_adapter/data/datasets/load_sem_seg.py b/mask_adapter/data/datasets/load_sem_seg.py new file mode 100644 index 0000000000000000000000000000000000000000..10967b27b74cd7ab94766053bb349077e62059b8 --- /dev/null +++ b/mask_adapter/data/datasets/load_sem_seg.py @@ -0,0 +1,90 @@ + + +import contextlib +import datetime +import os +import logging +import numpy as np +from PIL import Image + +from detectron2.utils.file_io import PathManager + + + +logger = logging.getLogger(__name__) + +def load_sem_seg(gt_root, image_root, gt_ext="png", image_ext="jpg", meta = None): + """ + Load semantic segmentation datasets. All files under "gt_root" with "gt_ext" extension are + treated as ground truth annotations and all files under "image_root" with "image_ext" extension + as input images. Ground truth and input images are matched using file paths relative to + "gt_root" and "image_root" respectively without taking into account file extensions. + This works for COCO as well as some other datasets. + + Args: + gt_root (str): full path to ground truth semantic segmentation files. Semantic segmentation + annotations are stored as images with integer values in pixels that represent + corresponding semantic labels. + image_root (str): the directory where the input images are. + gt_ext (str): file extension for ground truth annotations. + image_ext (str): file extension for input images. + + Returns: + list[dict]: + a list of dicts in detectron2 standard format without instance-level + annotation. + + Notes: + 1. This function does not read the image and ground truth files. + The results do not have the "image" and "sem_seg" fields. + """ + + # We match input images with ground truth based on their relative filepaths (without file + # extensions) starting from 'image_root' and 'gt_root' respectively. + def file2id(folder_path, file_path): + # extract relative path starting from `folder_path` + image_id = os.path.normpath(os.path.relpath(file_path, start=folder_path)) + # remove file extension + image_id = os.path.splitext(image_id)[0] + return image_id + + input_files = sorted( + (os.path.join(image_root, f) for f in PathManager.ls(image_root) if f.endswith(image_ext)), + key=lambda file_path: file2id(image_root, file_path), + ) + gt_files = sorted( + (os.path.join(gt_root, f) for f in PathManager.ls(gt_root) if f.endswith(gt_ext)), + key=lambda file_path: file2id(gt_root, file_path), + ) + + assert len(gt_files) > 0, "No annotations found in {}.".format(gt_root) + + # Use the intersection, so that val2017_100 annotations can run smoothly with val2017 images + if len(input_files) != len(gt_files): + logger.warn( + "Directory {} and {} has {} and {} files, respectively.".format( + image_root, gt_root, len(input_files), len(gt_files) + ) + ) + input_basenames = [os.path.basename(f)[: -len(image_ext)] for f in input_files] + gt_basenames = [os.path.basename(f)[: -len(gt_ext)] for f in gt_files] + intersect = list(set(input_basenames) & set(gt_basenames)) + # sort, otherwise each worker may obtain a list[dict] in different order + intersect = sorted(intersect) + logger.warn("Will use their intersection of {} files.".format(len(intersect))) + input_files = [os.path.join(image_root, f + image_ext) for f in intersect] + gt_files = [os.path.join(gt_root, f + gt_ext) for f in intersect] + + logger.info( + "Loaded {} images with semantic segmentation from {}".format(len(input_files), image_root) + ) + + dataset_dicts = [] + for (img_path, gt_path) in zip(input_files, gt_files): + record = {} + record["file_name"] = img_path + record["sem_seg_file_name"] = gt_path + record["meta"] = meta + dataset_dicts.append(record) + + return dataset_dicts \ No newline at end of file diff --git a/mask_adapter/data/datasets/lvis_1203_with_prompt_eng.txt b/mask_adapter/data/datasets/lvis_1203_with_prompt_eng.txt new file mode 100644 index 0000000000000000000000000000000000000000..db80237b5fffcff3b91b6f917f518f14103bff3a --- /dev/null +++ b/mask_adapter/data/datasets/lvis_1203_with_prompt_eng.txt @@ -0,0 +1,1203 @@ +1:aerosol can,spray can +2:air conditioner +3:airplane,aeroplane +4:alarm clock +5:alcohol,alcoholic beverage +6:alligator,gator +7:almond +8:ambulance +9:amplifier +10:anklet,ankle bracelet +11:antenna,aerial,transmitting aerial +12:apple +13:applesauce +14:apricot +15:apron +16:aquarium,fish tank +17:arctic (type of shoe),galosh,golosh,rubber (type of shoe),gumshoe +18:armband +19:armchair +20:armoire +21:armor,armour +22:artichoke +23:trash can,garbage can,wastebin,dustbin,trash barrel,trash bin +24:ashtray +25:asparagus +26:atomizer,atomiser,spray,sprayer,nebulizer,nebuliser +27:avocado +28:award,accolade +29:awning +30:ax,axe +31:baboon +32:baby buggy,baby carriage,perambulator,pram,stroller +33:basketball backboard +34:backpack,knapsack,packsack,rucksack,haversack +35:handbag,purse,pocketbook +36:suitcase,baggage,luggage +37:bagel,beigel +38:bagpipe +39:baguet,baguette +40:bait,lure +41:ball +42:ballet skirt,tutu +43:balloon +44:bamboo +45:banana +46:Band Aid +47:bandage +48:bandanna,bandana +49:banjo +50:banner,streamer +51:barbell +52:barge +53:barrel,cask +54:barrette +55:barrow,garden cart,lawn cart,wheelbarrow +56:baseball base +57:baseball +58:baseball bat +59:baseball cap,jockey cap,golf cap +60:baseball glove,baseball mitt +61:basket,handbasket +62:basketball +63:bass horn,sousaphone,tuba +64:bat (animal) +65:bath mat +66:bath towel +67:bathrobe +68:bathtub,bathing tub +69:batter (food) +70:battery +71:beachball +72:bead +73:bean curd,tofu +74:beanbag +75:beanie,beany +76:bear +77:bed +78:bedpan +79:bedspread,bedcover,bed covering,counterpane,spread +80:cow +81:beef (food),boeuf (food) +82:beeper,pager +83:beer bottle +84:beer can +85:beetle +86:bell +87:bell pepper,capsicum +88:belt +89:belt buckle +90:bench +91:beret +92:bib +93:Bible +94:bicycle,bike (bicycle) +95:visor,vizor +96:billboard +97:binder,ring-binder +98:binoculars,field glasses,opera glasses +99:bird +100:birdfeeder +101:birdbath +102:birdcage +103:birdhouse +104:birthday cake +105:birthday card +106:pirate flag +107:black sheep +108:blackberry +109:blackboard,chalkboard +110:blanket +111:blazer,sport jacket,sport coat,sports jacket,sports coat +112:blender,liquidizer,liquidiser +113:blimp +114:blinker,flasher +115:blouse +116:blueberry +117:gameboard +118:boat,ship (boat) +119:bob,bobber,bobfloat +120:bobbin,spool,reel +121:bobby pin,hairgrip +122:boiled egg,coddled egg +123:bolo tie,bolo,bola tie,bola +124:deadbolt +125:bolt +126:bonnet +127:book +128:bookcase +129:booklet,brochure,leaflet,pamphlet +130:bookmark,bookmarker +131:boom microphone,microphone boom +132:boot +133:bottle +134:bottle opener +135:bouquet +136:bow (weapon) +137:bow (decorative ribbons) +138:bow-tie,bowtie +139:bowl +140:pipe bowl +141:bowler hat,bowler,derby hat,derby,plug hat +142:bowling ball +143:box +144:boxing glove +145:suspenders +146:bracelet,bangle +147:brass plaque +148:brassiere,bra,bandeau +149:bread-bin,breadbox +150:bread +151:breechcloth,breechclout,loincloth +152:bridal gown,wedding gown,wedding dress +153:briefcase +154:broccoli +155:broach +156:broom +157:brownie +158:brussels sprouts +159:bubble gum +160:bucket,pail +161:horse buggy +162:horned cow +163:bulldog +164:bulldozer,dozer +165:bullet train +166:bulletin board,notice board +167:bulletproof vest +168:bullhorn,megaphone +169:bun,roll +170:bunk bed +171:buoy +172:burrito +173:bus (vehicle),autobus,charabanc,double-decker,motorbus,motorcoach +174:business card +175:butter +176:butterfly +177:button +178:cab (taxi),taxi,taxicab +179:cabana +180:cabin car,caboose +181:cabinet +182:locker,storage locker +183:cake +184:calculator +185:calendar +186:calf +187:camcorder +188:camel +189:camera +190:camera lens +191:camper (vehicle),camping bus,motor home +192:can,tin can +193:can opener,tin opener +194:candle,candlestick +195:candle holder +196:candy bar +197:candy cane +198:walking cane +199:canister,cannister +200:canoe +201:cantaloup,cantaloupe +202:canteen +203:cap (headwear) +204:bottle cap,cap (container lid) +205:cape +206:cappuccino,coffee cappuccino +207:car (automobile),auto (automobile),automobile +208:railcar (part of a train),railway car (part of a train),railroad car (part of a train) +209:elevator car +210:car battery,automobile battery +211:identity card +212:card +213:cardigan +214:cargo ship,cargo vessel +215:carnation +216:horse carriage +217:carrot +218:tote bag +219:cart +220:carton +221:cash register,register (for cash transactions) +222:casserole +223:cassette +224:cast,plaster cast,plaster bandage +225:cat +226:cauliflower +227:cayenne (spice),cayenne pepper (spice),red pepper (spice) +228:CD player +229:celery +230:cellular telephone,cellular phone,cellphone,mobile phone,smart phone +231:chain mail,ring mail,chain armor,chain armour,ring armor,ring armour +232:chair +233:chaise longue,chaise,daybed +234:chalice +235:chandelier +236:chap +237:checkbook,chequebook +238:checkerboard +239:cherry +240:chessboard +241:chicken (animal) +242:chickpea,garbanzo +243:chili (vegetable),chili pepper (vegetable),chilli (vegetable),chilly (vegetable),chile (vegetable) +244:chime,gong +245:chinaware +246:crisp (potato chip),potato chip +247:poker chip +248:chocolate bar +249:chocolate cake +250:chocolate milk +251:chocolate mousse +252:choker,collar,neckband +253:chopping board,cutting board,chopping block +254:chopstick +255:Christmas tree +256:slide +257:cider,cyder +258:cigar box +259:cigarette +260:cigarette case,cigarette pack +261:cistern,water tank +262:clarinet +263:clasp +264:cleansing agent,cleanser,cleaner +265:cleat (for securing rope) +266:clementine +267:clip +268:clipboard +269:clippers (for plants) +270:cloak +271:clock,timepiece,timekeeper +272:clock tower +273:clothes hamper,laundry basket,clothes basket +274:clothespin,clothes peg +275:clutch bag +276:coaster +277:coat +278:coat hanger,clothes hanger,dress hanger +279:coatrack,hatrack +280:cock,rooster +281:cockroach +282:cocoa (beverage),hot chocolate (beverage),drinking chocolate +283:coconut,cocoanut +284:coffee maker,coffee machine +285:coffee table,cocktail table +286:coffeepot +287:coil +288:coin +289:colander,cullender +290:coleslaw,slaw +291:coloring material,colouring material +292:combination lock +293:pacifier,teething ring +294:comic book +295:compass +296:computer keyboard,keyboard (computer) +297:condiment +298:cone,traffic cone +299:control,controller +300:convertible (automobile) +301:sofa bed +302:cooker +303:cookie,cooky,biscuit (cookie) +304:cooking utensil +305:cooler (for food),ice chest +306:cork (bottle plug),bottle cork +307:corkboard +308:corkscrew,bottle screw +309:edible corn,corn,maize +310:cornbread +311:cornet,horn,trumpet +312:cornice,valance,valance board,pelmet +313:cornmeal +314:corset,girdle +315:costume +316:cougar,puma,catamount,mountain lion,panther +317:coverall +318:cowbell +319:cowboy hat,ten-gallon hat +320:crab (animal) +321:crabmeat +322:cracker +323:crape,crepe,French pancake +324:crate +325:crayon,wax crayon +326:cream pitcher +327:crescent roll,croissant +328:crib,cot +329:crock pot,earthenware jar +330:crossbar +331:crouton +332:crow +333:crowbar,wrecking bar,pry bar +334:crown +335:crucifix +336:cruise ship,cruise liner +337:police cruiser,patrol car,police car,squad car +338:crumb +339:crutch +340:cub (animal) +341:cube,square block +342:cucumber,cuke +343:cufflink +344:cup +345:trophy cup +346:cupboard,closet +347:cupcake +348:hair curler,hair roller,hair crimper +349:curling iron +350:curtain,drapery +351:cushion +352:cylinder +353:cymbal +354:dagger +355:dalmatian +356:dartboard +357:date (fruit) +358:deck chair,beach chair +359:deer,cervid +360:dental floss,floss +361:desk +362:detergent +363:diaper +364:diary,journal +365:die,dice +366:dinghy,dory,rowboat +367:dining table +368:tux,tuxedo +369:dish +370:dish antenna +371:dishrag,dishcloth +372:dishtowel,tea towel +373:dishwasher,dishwashing machine +374:dishwasher detergent,dishwashing detergent,dishwashing liquid,dishsoap +375:dispenser +376:diving board +377:Dixie cup,paper cup +378:dog +379:dog collar +380:doll +381:dollar,dollar bill,one dollar bill +382:dollhouse,doll's house +383:dolphin +384:domestic ass,donkey +385:doorknob,doorhandle +386:doormat,welcome mat +387:doughnut,donut +388:dove +389:dragonfly +390:drawer +391:underdrawers,boxers,boxershorts +392:dress,frock +393:dress hat,high hat,opera hat,silk hat,top hat +394:dress suit +395:dresser +396:drill +397:drone +398:dropper,eye dropper +399:drum (musical instrument) +400:drumstick +401:duck +402:duckling +403:duct tape +404:duffel bag,duffle bag,duffel,duffle +405:dumbbell +406:dumpster +407:dustpan +408:eagle +409:earphone,earpiece,headphone +410:earplug +411:earring +412:easel +413:eclair +414:eel +415:egg,eggs +416:egg roll,spring roll +417:egg yolk,yolk (egg) +418:eggbeater,eggwhisk +419:eggplant,aubergine +420:electric chair +421:refrigerator +422:elephant +423:elk,moose +424:envelope +425:eraser +426:escargot +427:eyepatch +428:falcon +429:fan +430:faucet,spigot,tap +431:fedora +432:ferret +433:Ferris wheel +434:ferry,ferryboat +435:fig (fruit) +436:fighter jet,fighter aircraft,attack aircraft +437:figurine +438:file cabinet,filing cabinet +439:file (tool) +440:fire alarm,smoke alarm +441:fire engine,fire truck +442:fire extinguisher,extinguisher +443:fire hose +444:fireplace +445:fireplug,fire hydrant,hydrant +446:first-aid kit +447:fish +448:fish (food) +449:fishbowl,goldfish bowl +450:fishing rod,fishing pole +451:flag +452:flagpole,flagstaff +453:flamingo +454:flannel +455:flap +456:flash,flashbulb +457:flashlight,torch +458:fleece +459:flip-flop (sandal) +460:flipper (footwear),fin (footwear) +461:flower arrangement,floral arrangement +462:flute glass,champagne flute +463:foal +464:folding chair +465:food processor +466:football (American) +467:football helmet +468:footstool,footrest +469:fork +470:forklift +471:freight car +472:French toast +473:freshener,air freshener +474:frisbee +475:frog,toad,toad frog +476:fruit juice +477:frying pan,frypan,skillet +478:fudge +479:funnel +480:futon +481:gag,muzzle +482:garbage +483:garbage truck +484:garden hose +485:gargle,mouthwash +486:gargoyle +487:garlic,ail +488:gasmask,respirator,gas helmet +489:gazelle +490:gelatin,jelly +491:gemstone +492:generator +493:giant panda,panda,panda bear +494:gift wrap +495:ginger,gingerroot +496:giraffe +497:cincture,sash,waistband,waistcloth +498:glass (drink container),drinking glass +499:globe +500:glove +501:goat +502:goggles +503:goldfish +504:golf club,golf-club +505:golfcart +506:gondola (boat) +507:goose +508:gorilla +509:gourd +510:grape +511:grater +512:gravestone,headstone,tombstone +513:gravy boat,gravy holder +514:green bean +515:green onion,spring onion,scallion +516:griddle +517:grill,grille,grillwork,radiator grille +518:grits,hominy grits +519:grizzly,grizzly bear +520:grocery bag +521:guitar +522:gull,seagull +523:gun +524:hairbrush +525:hairnet +526:hairpin +527:halter top +528:ham,jambon,gammon +529:hamburger,beefburger,burger +530:hammer +531:hammock +532:hamper +533:hamster +534:hair dryer +535:hand glass,hand mirror +536:hand towel,face towel +537:handcart,pushcart,hand truck +538:handcuff +539:handkerchief +540:handle,grip,handgrip +541:handsaw,carpenter's saw +542:hardback book,hardcover book +543:harmonium,organ (musical instrument),reed organ (musical instrument) +544:hat +545:hatbox +546:veil +547:headband +548:headboard +549:headlight,headlamp +550:headscarf +551:headset +552:headstall (for horses),headpiece (for horses) +553:heart +554:heater,warmer +555:helicopter +556:helmet +557:heron +558:highchair,feeding chair +559:hinge +560:hippopotamus +561:hockey stick +562:hog,pig +563:home plate (baseball),home base (baseball) +564:honey +565:fume hood,exhaust hood +566:hook +567:hookah,narghile,nargileh,sheesha,shisha,water pipe +568:hornet +569:horse +570:hose,hosepipe +571:hot-air balloon +572:hotplate +573:hot sauce +574:hourglass +575:houseboat +576:hummingbird +577:hummus,humus,hommos,hoummos,humous +578:polar bear +579:icecream +580:popsicle +581:ice maker +582:ice pack,ice bag +583:ice skate +584:igniter,ignitor,lighter +585:inhaler,inhalator +586:iPod +587:iron (for clothing),smoothing iron (for clothing) +588:ironing board +589:jacket +590:jam +591:jar +592:jean,blue jean,denim +593:jeep,landrover +594:jelly bean,jelly egg +595:jersey,T-shirt,tee shirt +596:jet plane,jet-propelled plane +597:jewel,gem,precious stone +598:jewelry,jewellery +599:joystick +600:jumpsuit +601:kayak +602:keg +603:kennel,doghouse +604:kettle,boiler +605:key +606:keycard +607:kilt +608:kimono +609:kitchen sink +610:kitchen table +611:kite +612:kitten,kitty +613:kiwi fruit +614:knee pad +615:knife +616:knitting needle +617:knob +618:knocker (on a door),doorknocker +619:koala,koala bear +620:lab coat,laboratory coat +621:ladder +622:ladle +623:ladybug,ladybeetle,ladybird beetle +624:lamb (animal) +625:lamb-chop,lambchop +626:lamp +627:lamppost +628:lampshade +629:lantern +630:lanyard,laniard +631:laptop computer,notebook computer +632:lasagna,lasagne +633:latch +634:lawn mower +635:leather +636:legging (clothing),leging (clothing),leg covering +637:Lego,Lego set +638:legume +639:lemon +640:lemonade +641:lettuce +642:license plate,numberplate +643:life buoy,lifesaver,life belt,life ring +644:life jacket,life vest +645:lightbulb +646:lightning rod,lightning conductor +647:lime +648:limousine +649:lion +650:lip balm +651:liquor,spirits,hard liquor,liqueur,cordial +652:lizard +653:log +654:lollipop +655:speaker (stero equipment) +656:loveseat +657:machine gun +658:magazine +659:magnet +660:mail slot +661:mailbox (at home),letter box (at home) +662:mallard +663:mallet +664:mammoth +665:manatee +666:mandarin orange +667:manger,trough +668:manhole +669:map +670:marker +671:martini +672:mascot +673:mashed potato +674:masher +675:mask,facemask +676:mast +677:mat (gym equipment),gym mat +678:matchbox +679:mattress +680:measuring cup +681:measuring stick,ruler (measuring stick),measuring rod +682:meatball +683:medicine +684:melon +685:microphone +686:microscope +687:microwave oven +688:milestone,milepost +689:milk +690:milk can +691:milkshake +692:minivan +693:mint candy +694:mirror +695:mitten +696:mixer (kitchen tool),stand mixer +697:money +698:monitor (computer equipment) computer monitor +699:monkey +700:motor +701:motor scooter,scooter +702:motor vehicle,automotive vehicle +703:motorcycle +704:mound (baseball),pitcher's mound +705:mouse (computer equipment),computer mouse +706:mousepad +707:muffin +708:mug +709:mushroom +710:music stool,piano stool +711:musical instrument,instrument (musical) +712:nailfile +713:napkin,table napkin,serviette +714:neckerchief +715:necklace +716:necktie,tie (necktie) +717:needle +718:nest +719:newspaper,paper (newspaper) +720:newsstand +721:nightshirt,nightwear,sleepwear,nightclothes +722:nosebag (for animals),feedbag +723:noseband (for animals),nosepiece (for animals) +724:notebook +725:notepad +726:nut +727:nutcracker +728:oar +729:octopus (food) +730:octopus (animal) +731:oil lamp,kerosene lamp,kerosine lamp +732:olive oil +733:omelet,omelette +734:onion +735:orange (fruit) +736:orange juice +737:ostrich +738:ottoman,pouf,pouffe,hassock +739:oven +740:overalls (clothing) +741:owl +742:packet +743:inkpad,inking pad,stamp pad +744:pad +745:paddle,boat paddle +746:padlock +747:paintbrush +748:painting +749:pajamas,pyjamas +750:palette,pallet +751:pan (for cooking),cooking pan +752:pan (metal container) +753:pancake +754:pantyhose +755:papaya +756:paper plate +757:paper towel +758:paperback book,paper-back book,softback book,soft-cover book +759:paperweight +760:parachute +761:parakeet,parrakeet,parroket,paraquet,paroquet,parroquet +762:parasail (sports) +763:parasol,sunshade +764:parchment +765:parka,anorak +766:parking meter +767:parrot +768:passenger car (part of a train),coach (part of a train) +769:passenger ship +770:passport +771:pastry +772:patty (food) +773:pea (food) +774:peach +775:peanut butter +776:pear +777:peeler (tool for fruit and vegetables) +778:wooden leg,pegleg +779:pegboard +780:pelican +781:pen +782:pencil +783:pencil box,pencil case +784:pencil sharpener +785:pendulum +786:penguin +787:pennant +788:penny (coin) +789:pepper,peppercorn +790:pepper mill,pepper grinder +791:perfume +792:persimmon +793:person,baby,child,boy,girl,man,woman,human +794:pet +795:pew (church bench),church bench +796:phonebook,telephone book,telephone directory +797:phonograph record,phonograph recording,record (phonograph recording) +798:piano +799:pickle +800:pickup truck +801:pie +802:pigeon +803:piggy bank,penny bank +804:pillow +805:pin (non jewelry) +806:pineapple +807:pinecone +808:ping-pong ball +809:pinwheel +810:tobacco pipe +811:pipe,piping +812:pistol,handgun +813:pita (bread),pocket bread +814:pitcher (vessel for liquid),ewer +815:pitchfork +816:pizza +817:place mat +818:plate +819:platter +820:playpen +821:pliers,plyers +822:plow (farm equipment),plough (farm equipment) +823:plume +824:pocket watch +825:pocketknife +826:poker (fire stirring tool),stove poker,fire hook +827:pole,post +828:polo shirt,sport shirt +829:poncho +830:pony +831:pool table,billiard table,snooker table +832:pop (soda),soda (pop),tonic,soft drink +833:postbox (public),mailbox (public) +834:postcard,postal card,mailing-card +835:poster,placard +836:pot +837:flowerpot +838:potato +839:potholder +840:pottery,clayware +841:pouch +842:power shovel,excavator,digger +843:prawn,shrimp +844:pretzel +845:printer,printing machine +846:projectile (weapon),missile +847:projector +848:propeller,propellor +849:prune +850:pudding +851:puffer (fish),pufferfish,blowfish,globefish +852:puffin +853:pug-dog +854:pumpkin +855:puncher +856:puppet,marionette +857:puppy +858:quesadilla +859:quiche +860:quilt,comforter +861:rabbit +862:race car,racing car +863:racket,racquet +864:radar +865:radiator +866:radio receiver,radio set,radio,tuner (radio) +867:radish,daikon +868:raft +869:rag doll +870:raincoat,waterproof jacket +871:ram (animal) +872:raspberry +873:rat +874:razorblade +875:reamer (juicer),juicer,juice reamer +876:rearview mirror +877:receipt +878:recliner,reclining chair,lounger (chair) +879:record player,phonograph (record player),turntable +880:reflector +881:remote control +882:rhinoceros +883:rib (food) +884:rifle +885:ring +886:river boat +887:road map +888:robe +889:rocking chair +890:rodent +891:roller skate +892:Rollerblade +893:rolling pin +894:root beer +895:router (computer equipment) +896:rubber band,elastic band +897:runner (carpet) +898:plastic bag,paper bag +899:saddle (on an animal) +900:saddle blanket,saddlecloth,horse blanket +901:saddlebag +902:safety pin +903:sail +904:salad +905:salad plate,salad bowl +906:salami +907:salmon (fish) +908:salmon (food) +909:salsa +910:saltshaker +911:sandal (type of shoe) +912:sandwich +913:satchel +914:saucepan +915:saucer +916:sausage +917:sawhorse,sawbuck +918:saxophone +919:scale (measuring instrument) +920:scarecrow,strawman +921:scarf +922:school bus +923:scissors +924:scoreboard +925:scraper +926:screwdriver +927:scrubbing brush +928:sculpture +929:seabird,seafowl +930:seahorse +931:seaplane,hydroplane +932:seashell +933:sewing machine +934:shaker +935:shampoo +936:shark +937:sharpener +938:Sharpie +939:shaver (electric),electric shaver,electric razor +940:shaving cream,shaving soap +941:shawl +942:shears +943:sheep +944:shepherd dog,sheepdog +945:sherbert,sherbet +946:shield +947:shirt +948:shoe,sneaker (type of shoe),tennis shoe +949:shopping bag +950:shopping cart +951:short pants,shorts (clothing),trunks (clothing) +952:shot glass +953:shoulder bag +954:shovel +955:shower head +956:shower cap +957:shower curtain +958:shredder (for paper) +959:signboard +960:silo +961:sink +962:skateboard +963:skewer +964:ski +965:ski boot +966:ski parka,ski jacket +967:ski pole +968:skirt +969:skullcap +970:sled,sledge,sleigh +971:sleeping bag +972:sling (bandage),triangular bandage +973:slipper (footwear),carpet slipper (footwear) +974:smoothie +975:snake,serpent +976:snowboard +977:snowman +978:snowmobile +979:soap +980:soccer ball +981:sock +982:sofa,couch,lounge +983:softball +984:solar array,solar battery,solar panel +985:sombrero +986:soup +987:soup bowl +988:soupspoon +989:sour cream,soured cream +990:soya milk,soybean milk,soymilk +991:space shuttle +992:sparkler (fireworks) +993:spatula +994:spear,lance +995:spectacles,specs,eyeglasses,glasses +996:spice rack +997:spider +998:crawfish,crayfish +999:sponge +1000:spoon +1001:sportswear,athletic wear,activewear +1002:spotlight +1003:squid (food),calamari,calamary +1004:squirrel +1005:stagecoach +1006:stapler (stapling machine) +1007:starfish,sea star +1008:statue (sculpture) +1009:steak (food) +1010:steak knife +1011:steering wheel +1012:stepladder +1013:step stool +1014:stereo (sound system) +1015:stew +1016:stirrer +1017:stirrup +1018:stool +1019:stop sign +1020:brake light +1021:stove,kitchen stove,range (kitchen appliance),kitchen range,cooking stove +1022:strainer +1023:strap +1024:straw (for drinking),drinking straw +1025:strawberry +1026:street sign +1027:streetlight,street lamp +1028:string cheese +1029:stylus +1030:subwoofer +1031:sugar bowl +1032:sugarcane (plant) +1033:suit (clothing) +1034:sunflower +1035:sunglasses +1036:sunhat +1037:surfboard +1038:sushi +1039:mop +1040:sweat pants +1041:sweatband +1042:sweater +1043:sweatshirt +1044:sweet potato +1045:swimsuit,swimwear,bathing suit,swimming costume,bathing costume,swimming trunks,bathing trunks +1046:sword +1047:syringe +1048:Tabasco sauce +1049:table-tennis table,ping-pong table +1050:table +1051:table lamp +1052:tablecloth +1053:tachometer +1054:taco +1055:tag +1056:taillight,rear light +1057:tambourine +1058:army tank,armored combat vehicle,armoured combat vehicle +1059:tank (storage vessel),storage tank +1060:tank top (clothing) +1061:tape (sticky cloth or paper) +1062:tape measure,measuring tape +1063:tapestry +1064:tarp +1065:tartan,plaid +1066:tassel +1067:tea bag +1068:teacup +1069:teakettle +1070:teapot +1071:teddy bear +1072:telephone,phone,telephone set +1073:telephone booth,phone booth,call box,telephone box,telephone kiosk +1074:telephone pole,telegraph pole,telegraph post +1075:telephoto lens,zoom lens +1076:television camera,tv camera +1077:television set,tv,tv set +1078:tennis ball +1079:tennis racket +1080:tequila +1081:thermometer +1082:thermos bottle +1083:thermostat +1084:thimble +1085:thread,yarn +1086:thumbtack,drawing pin,pushpin +1087:tiara +1088:tiger +1089:tights (clothing),leotards +1090:timer,stopwatch +1091:tinfoil +1092:tinsel +1093:tissue paper +1094:toast (food) +1095:toaster +1096:toaster oven +1097:toilet +1098:toilet tissue,toilet paper,bathroom tissue +1099:tomato +1100:tongs +1101:toolbox +1102:toothbrush +1103:toothpaste +1104:toothpick +1105:cover +1106:tortilla +1107:tow truck +1108:towel +1109:towel rack,towel rail,towel bar +1110:toy +1111:tractor (farm equipment) +1112:traffic light +1113:dirt bike +1114:trailer truck,tractor trailer,trucking rig,articulated lorry,semi truck +1115:train (railroad vehicle),railroad train +1116:trampoline +1117:tray +1118:trench coat +1119:triangle (musical instrument) +1120:tricycle +1121:tripod +1122:trousers,pants (clothing) +1123:truck +1124:truffle (chocolate),chocolate truffle +1125:trunk +1126:vat +1127:turban +1128:turkey (food) +1129:turnip +1130:turtle +1131:turtleneck (clothing),polo-neck +1132:typewriter +1133:umbrella +1134:underwear,underclothes,underclothing,underpants +1135:unicycle +1136:urinal +1137:urn +1138:vacuum cleaner +1139:vase +1140:vending machine +1141:vent,blowhole,air vent +1142:vest,waistcoat +1143:videotape +1144:vinegar +1145:violin,fiddle +1146:vodka +1147:volleyball +1148:vulture +1149:waffle +1150:waffle iron +1151:wagon +1152:wagon wheel +1153:walking stick +1154:wall clock +1155:wall socket,wall plug,electric outlet,electrical outlet,outlet,electric receptacle +1156:wallet,billfold +1157:walrus +1158:wardrobe +1159:washbasin,basin (for washing),washbowl,washstand,handbasin +1160:automatic washer,washing machine +1161:watch,wristwatch +1162:water bottle +1163:water cooler +1164:water faucet,water tap,tap (water faucet) +1165:water heater,hot-water heater +1166:water jug +1167:water gun,squirt gun +1168:water scooter,sea scooter,jet ski +1169:water ski +1170:water tower +1171:watering can +1172:watermelon +1173:weathervane,vane (weathervane),wind vane +1174:webcam +1175:wedding cake,bridecake +1176:wedding ring,wedding band +1177:wet suit +1178:wheel +1179:wheelchair +1180:whipped cream +1181:whistle +1182:wig +1183:wind chime +1184:windmill +1185:window box (for plants) +1186:windshield wiper,windscreen wiper,wiper (for windshield/screen) +1187:windsock,air sock,air-sleeve,wind sleeve,wind cone +1188:wine bottle +1189:wine bucket,wine cooler +1190:wineglass +1191:blinder (for horses) +1192:wok +1193:wolf +1194:wooden spoon +1195:wreath +1196:wrench,spanner +1197:wristband +1198:wristlet,wrist band +1199:yacht +1200:yogurt,yoghurt,yoghourt +1201:yoke (animal equipment) +1202:zebra +1203:zucchini,courgette \ No newline at end of file diff --git a/mask_adapter/data/datasets/mapillary_vistas_with_prompt_eng.txt b/mask_adapter/data/datasets/mapillary_vistas_with_prompt_eng.txt new file mode 100644 index 0000000000000000000000000000000000000000..d8d5ba17ac1bbddef0605d039d012cc873118870 --- /dev/null +++ b/mask_adapter/data/datasets/mapillary_vistas_with_prompt_eng.txt @@ -0,0 +1,66 @@ +0:invalid_class_id +1:bird,birds +2:ground animal,ground animate being,dog,cat,horse,cow,sheep,zebra,giraffe +3:curb,curbs +4:fence,fences +5:guard rail +6:barrier +7:wall,walls,brick wall,stone wall,interior wall +8:bike Lane +9:crosswalk +10:curb cut +11:parking +12:pedestrian area +13:rail track +14:road +15:service lane +16:sidewalk,pavement +17:bridge +18:building,buildings +19:tunnel +20:person,child,girl,boy,woman,man,people,children,girls,boys,women,men,lady,guy,ladies,guys,clothes +21:bicyclist,bicyclists +22:motorcyclist,motorcyclists +23:other rider,rider +24:lane marking of crosswalk +25:lane marking +26:mountain,mountains +27:sand +28:sky,clouds +29:snow +30:terrain,river,sea,grass,dirt,rock +31:vegetation,tree,trees,palm tree,bushes +32:water +33:banner,streamer +34:bench,benches +35:bike rack +36:billboard,hoarding +37:catch basin +38:cctv camera,cctv +39:fire hydrant,fireplug,plug +40:junction box +41:mailbox,postbox,mailbox,letter box +42:manhole +43:phone booth,telephone booth,call box,telephone box,telephone kiosk +44:pothole +45:street light +46:pole +47:traffic sign frame +48:utility pole +49:traffic light,traffic signal,traffic lights +50:traffic sign (back),back of traffic sign,traffic sign back +51:traffic sign (front),front of traffic sign,traffic sign front +52:trash can,ashcan,garbage can,wastebin,ash bin,ash-bin,ashbin,dustbin,trash barrel,trash bin +53:bicycle,bike +54:boat +55:bus,autobus,double-decker,jitney,motorbus,motorcoach,omnibus,passenger vehicle +56:car,automobile,cars +57:caravan +58:motorcycle,motorcycles +59:on rails +60:other vehicle,vehicle +61:trailer +62:truck,motortruck +63:wheeled slow +64:car mount +65:ego vehicle \ No newline at end of file diff --git a/mask_adapter/data/datasets/openseg_classes.py b/mask_adapter/data/datasets/openseg_classes.py new file mode 100644 index 0000000000000000000000000000000000000000..dbc58b086e62e3929de9ded6c6f79d2c3cdb848c --- /dev/null +++ b/mask_adapter/data/datasets/openseg_classes.py @@ -0,0 +1,2492 @@ +""" +Copyright (2023) Bytedance Ltd. and/or its affiliates + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import copy +COCO_CATEGORIES = [ + {"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "person"}, + {"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "bicycle"}, + {"color": [0, 0, 142], "isthing": 1, "id": 3, "name": "car"}, + {"color": [0, 0, 230], "isthing": 1, "id": 4, "name": "motorcycle"}, + {"color": [106, 0, 228], "isthing": 1, "id": 5, "name": "airplane"}, + {"color": [0, 60, 100], "isthing": 1, "id": 6, "name": "bus"}, + {"color": [0, 80, 100], "isthing": 1, "id": 7, "name": "train"}, + {"color": [0, 0, 70], "isthing": 1, "id": 8, "name": "truck"}, + {"color": [0, 0, 192], "isthing": 1, "id": 9, "name": "boat"}, + {"color": [250, 170, 30], "isthing": 1, "id": 10, "name": "traffic light"}, + {"color": [100, 170, 30], "isthing": 1, "id": 11, "name": "fire hydrant"}, + {"color": [220, 220, 0], "isthing": 1, "id": 13, "name": "stop sign"}, + {"color": [175, 116, 175], "isthing": 1, "id": 14, "name": "parking meter"}, + {"color": [250, 0, 30], "isthing": 1, "id": 15, "name": "bench"}, + {"color": [165, 42, 42], "isthing": 1, "id": 16, "name": "bird"}, + {"color": [255, 77, 255], "isthing": 1, "id": 17, "name": "cat"}, + {"color": [0, 226, 252], "isthing": 1, "id": 18, "name": "dog"}, + {"color": [182, 182, 255], "isthing": 1, "id": 19, "name": "horse"}, + {"color": [0, 82, 0], "isthing": 1, "id": 20, "name": "sheep"}, + {"color": [120, 166, 157], "isthing": 1, "id": 21, "name": "cow"}, + {"color": [110, 76, 0], "isthing": 1, "id": 22, "name": "elephant"}, + {"color": [174, 57, 255], "isthing": 1, "id": 23, "name": "bear"}, + {"color": [199, 100, 0], "isthing": 1, "id": 24, "name": "zebra"}, + {"color": [72, 0, 118], "isthing": 1, "id": 25, "name": "giraffe"}, + {"color": [255, 179, 240], "isthing": 1, "id": 27, "name": "backpack"}, + {"color": [0, 125, 92], "isthing": 1, "id": 28, "name": "umbrella"}, + {"color": [209, 0, 151], "isthing": 1, "id": 31, "name": "handbag"}, + {"color": [188, 208, 182], "isthing": 1, "id": 32, "name": "tie"}, + {"color": [0, 220, 176], "isthing": 1, "id": 33, "name": "suitcase"}, + {"color": [255, 99, 164], "isthing": 1, "id": 34, "name": "frisbee"}, + {"color": [92, 0, 73], "isthing": 1, "id": 35, "name": "skis"}, + {"color": [133, 129, 255], "isthing": 1, "id": 36, "name": "snowboard"}, + {"color": [78, 180, 255], "isthing": 1, "id": 37, "name": "sports ball"}, + {"color": [0, 228, 0], "isthing": 1, "id": 38, "name": "kite"}, + {"color": [174, 255, 243], "isthing": 1, "id": 39, "name": "baseball bat"}, + {"color": [45, 89, 255], "isthing": 1, "id": 40, "name": "baseball glove"}, + {"color": [134, 134, 103], "isthing": 1, "id": 41, "name": "skateboard"}, + {"color": [145, 148, 174], "isthing": 1, "id": 42, "name": "surfboard"}, + {"color": [255, 208, 186], "isthing": 1, "id": 43, "name": "tennis racket"}, + {"color": [197, 226, 255], "isthing": 1, "id": 44, "name": "bottle"}, + {"color": [171, 134, 1], "isthing": 1, "id": 46, "name": "wine glass"}, + {"color": [109, 63, 54], "isthing": 1, "id": 47, "name": "cup"}, + {"color": [207, 138, 255], "isthing": 1, "id": 48, "name": "fork"}, + {"color": [151, 0, 95], "isthing": 1, "id": 49, "name": "knife"}, + {"color": [9, 80, 61], "isthing": 1, "id": 50, "name": "spoon"}, + {"color": [84, 105, 51], "isthing": 1, "id": 51, "name": "bowl"}, + {"color": [74, 65, 105], "isthing": 1, "id": 52, "name": "banana"}, + {"color": [166, 196, 102], "isthing": 1, "id": 53, "name": "apple"}, + {"color": [208, 195, 210], "isthing": 1, "id": 54, "name": "sandwich"}, + {"color": [255, 109, 65], "isthing": 1, "id": 55, "name": "orange"}, + {"color": [0, 143, 149], "isthing": 1, "id": 56, "name": "broccoli"}, + {"color": [179, 0, 194], "isthing": 1, "id": 57, "name": "carrot"}, + {"color": [209, 99, 106], "isthing": 1, "id": 58, "name": "hot dog"}, + {"color": [5, 121, 0], "isthing": 1, "id": 59, "name": "pizza"}, + {"color": [227, 255, 205], "isthing": 1, "id": 60, "name": "donut"}, + {"color": [147, 186, 208], "isthing": 1, "id": 61, "name": "cake"}, + {"color": [153, 69, 1], "isthing": 1, "id": 62, "name": "chair"}, + {"color": [3, 95, 161], "isthing": 1, "id": 63, "name": "couch"}, + {"color": [163, 255, 0], "isthing": 1, "id": 64, "name": "potted plant"}, + {"color": [119, 0, 170], "isthing": 1, "id": 65, "name": "bed"}, + {"color": [0, 182, 199], "isthing": 1, "id": 67, "name": "dining table"}, + {"color": [0, 165, 120], "isthing": 1, "id": 70, "name": "toilet"}, + {"color": [183, 130, 88], "isthing": 1, "id": 72, "name": "tv"}, + {"color": [95, 32, 0], "isthing": 1, "id": 73, "name": "laptop"}, + {"color": [130, 114, 135], "isthing": 1, "id": 74, "name": "mouse"}, + {"color": [110, 129, 133], "isthing": 1, "id": 75, "name": "remote"}, + {"color": [166, 74, 118], "isthing": 1, "id": 76, "name": "keyboard"}, + {"color": [219, 142, 185], "isthing": 1, "id": 77, "name": "cell phone"}, + {"color": [79, 210, 114], "isthing": 1, "id": 78, "name": "microwave"}, + {"color": [178, 90, 62], "isthing": 1, "id": 79, "name": "oven"}, + {"color": [65, 70, 15], "isthing": 1, "id": 80, "name": "toaster"}, + {"color": [127, 167, 115], "isthing": 1, "id": 81, "name": "sink"}, + {"color": [59, 105, 106], "isthing": 1, "id": 82, "name": "refrigerator"}, + {"color": [142, 108, 45], "isthing": 1, "id": 84, "name": "book"}, + {"color": [196, 172, 0], "isthing": 1, "id": 85, "name": "clock"}, + {"color": [95, 54, 80], "isthing": 1, "id": 86, "name": "vase"}, + {"color": [128, 76, 255], "isthing": 1, "id": 87, "name": "scissors"}, + {"color": [201, 57, 1], "isthing": 1, "id": 88, "name": "teddy bear"}, + {"color": [246, 0, 122], "isthing": 1, "id": 89, "name": "hair drier"}, + {"color": [191, 162, 208], "isthing": 1, "id": 90, "name": "toothbrush"}, + {"color": [255, 255, 128], "isthing": 0, "id": 92, "name": "banner"}, + {"color": [147, 211, 203], "isthing": 0, "id": 93, "name": "blanket"}, + {"color": [150, 100, 100], "isthing": 0, "id": 95, "name": "bridge"}, + {"color": [168, 171, 172], "isthing": 0, "id": 100, "name": "cardboard"}, + {"color": [146, 112, 198], "isthing": 0, "id": 107, "name": "counter"}, + {"color": [210, 170, 100], "isthing": 0, "id": 109, "name": "curtain"}, + {"color": [92, 136, 89], "isthing": 0, "id": 112, "name": "door-stuff"}, + {"color": [218, 88, 184], "isthing": 0, "id": 118, "name": "floor-wood"}, + {"color": [241, 129, 0], "isthing": 0, "id": 119, "name": "flower"}, + {"color": [217, 17, 255], "isthing": 0, "id": 122, "name": "fruit"}, + {"color": [124, 74, 181], "isthing": 0, "id": 125, "name": "gravel"}, + {"color": [70, 70, 70], "isthing": 0, "id": 128, "name": "house"}, + {"color": [255, 228, 255], "isthing": 0, "id": 130, "name": "light"}, + {"color": [154, 208, 0], "isthing": 0, "id": 133, "name": "mirror-stuff"}, + {"color": [193, 0, 92], "isthing": 0, "id": 138, "name": "net"}, + {"color": [76, 91, 113], "isthing": 0, "id": 141, "name": "pillow"}, + {"color": [255, 180, 195], "isthing": 0, "id": 144, "name": "platform"}, + {"color": [106, 154, 176], "isthing": 0, "id": 145, "name": "playingfield"}, + {"color": [230, 150, 140], "isthing": 0, "id": 147, "name": "railroad"}, + {"color": [60, 143, 255], "isthing": 0, "id": 148, "name": "river"}, + {"color": [128, 64, 128], "isthing": 0, "id": 149, "name": "road"}, + {"color": [92, 82, 55], "isthing": 0, "id": 151, "name": "roof"}, + {"color": [254, 212, 124], "isthing": 0, "id": 154, "name": "sand"}, + {"color": [73, 77, 174], "isthing": 0, "id": 155, "name": "sea"}, + {"color": [255, 160, 98], "isthing": 0, "id": 156, "name": "shelf"}, + {"color": [255, 255, 255], "isthing": 0, "id": 159, "name": "snow"}, + {"color": [104, 84, 109], "isthing": 0, "id": 161, "name": "stairs"}, + {"color": [169, 164, 131], "isthing": 0, "id": 166, "name": "tent"}, + {"color": [225, 199, 255], "isthing": 0, "id": 168, "name": "towel"}, + {"color": [137, 54, 74], "isthing": 0, "id": 171, "name": "wall-brick"}, + {"color": [135, 158, 223], "isthing": 0, "id": 175, "name": "wall-stone"}, + {"color": [7, 246, 231], "isthing": 0, "id": 176, "name": "wall-tile"}, + {"color": [107, 255, 200], "isthing": 0, "id": 177, "name": "wall-wood"}, + {"color": [58, 41, 149], "isthing": 0, "id": 178, "name": "water-other"}, + {"color": [183, 121, 142], "isthing": 0, "id": 180, "name": "window-blind"}, + {"color": [255, 73, 97], "isthing": 0, "id": 181, "name": "window-other"}, + {"color": [107, 142, 35], "isthing": 0, "id": 184, "name": "tree-merged"}, + {"color": [190, 153, 153], "isthing": 0, "id": 185, "name": "fence-merged"}, + {"color": [146, 139, 141], "isthing": 0, "id": 186, "name": "ceiling-merged"}, + {"color": [70, 130, 180], "isthing": 0, "id": 187, "name": "sky-other-merged"}, + {"color": [134, 199, 156], "isthing": 0, "id": 188, "name": "cabinet-merged"}, + {"color": [209, 226, 140], "isthing": 0, "id": 189, "name": "table-merged"}, + {"color": [96, 36, 108], "isthing": 0, "id": 190, "name": "floor-other-merged"}, + {"color": [96, 96, 96], "isthing": 0, "id": 191, "name": "pavement-merged"}, + {"color": [64, 170, 64], "isthing": 0, "id": 192, "name": "mountain-merged"}, + {"color": [152, 251, 152], "isthing": 0, "id": 193, "name": "grass-merged"}, + {"color": [208, 229, 228], "isthing": 0, "id": 194, "name": "dirt-merged"}, + {"color": [206, 186, 171], "isthing": 0, "id": 195, "name": "paper-merged"}, + {"color": [152, 161, 64], "isthing": 0, "id": 196, "name": "food-other-merged"}, + {"color": [116, 112, 0], "isthing": 0, "id": 197, "name": "building-other-merged"}, + {"color": [0, 114, 143], "isthing": 0, "id": 198, "name": "rock-merged"}, + {"color": [102, 102, 156], "isthing": 0, "id": 199, "name": "wall-other-merged"}, + {"color": [250, 141, 255], "isthing": 0, "id": 200, "name": "rug-merged"}, +] + +ADE20K_150_CATEGORIES = [ + {"color": [120, 120, 120], "id": 0, "isthing": 0, "name": "wall"}, + {"color": [180, 120, 120], "id": 1, "isthing": 0, "name": "building"}, + {"color": [6, 230, 230], "id": 2, "isthing": 0, "name": "sky"}, + {"color": [80, 50, 50], "id": 3, "isthing": 0, "name": "floor"}, + {"color": [4, 200, 3], "id": 4, "isthing": 0, "name": "tree"}, + {"color": [120, 120, 80], "id": 5, "isthing": 0, "name": "ceiling"}, + {"color": [140, 140, 140], "id": 6, "isthing": 0, "name": "road, route"}, + {"color": [204, 5, 255], "id": 7, "isthing": 1, "name": "bed"}, + {"color": [230, 230, 230], "id": 8, "isthing": 1, "name": "window "}, + {"color": [4, 250, 7], "id": 9, "isthing": 0, "name": "grass"}, + {"color": [224, 5, 255], "id": 10, "isthing": 1, "name": "cabinet"}, + {"color": [235, 255, 7], "id": 11, "isthing": 0, "name": "sidewalk, pavement"}, + {"color": [150, 5, 61], "id": 12, "isthing": 1, "name": "person"}, + {"color": [120, 120, 70], "id": 13, "isthing": 0, "name": "earth, ground"}, + {"color": [8, 255, 51], "id": 14, "isthing": 1, "name": "door"}, + {"color": [255, 6, 82], "id": 15, "isthing": 1, "name": "table"}, + {"color": [143, 255, 140], "id": 16, "isthing": 0, "name": "mountain, mount"}, + {"color": [204, 255, 4], "id": 17, "isthing": 0, "name": "plant"}, + {"color": [255, 51, 7], "id": 18, "isthing": 1, "name": "curtain"}, + {"color": [204, 70, 3], "id": 19, "isthing": 1, "name": "chair"}, + {"color": [0, 102, 200], "id": 20, "isthing": 1, "name": "car"}, + {"color": [61, 230, 250], "id": 21, "isthing": 0, "name": "water"}, + {"color": [255, 6, 51], "id": 22, "isthing": 1, "name": "painting, picture"}, + {"color": [11, 102, 255], "id": 23, "isthing": 1, "name": "sofa"}, + {"color": [255, 7, 71], "id": 24, "isthing": 1, "name": "shelf"}, + {"color": [255, 9, 224], "id": 25, "isthing": 0, "name": "house"}, + {"color": [9, 7, 230], "id": 26, "isthing": 0, "name": "sea"}, + {"color": [220, 220, 220], "id": 27, "isthing": 1, "name": "mirror"}, + {"color": [255, 9, 92], "id": 28, "isthing": 0, "name": "rug"}, + {"color": [112, 9, 255], "id": 29, "isthing": 0, "name": "field"}, + {"color": [8, 255, 214], "id": 30, "isthing": 1, "name": "armchair"}, + {"color": [7, 255, 224], "id": 31, "isthing": 1, "name": "seat"}, + {"color": [255, 184, 6], "id": 32, "isthing": 1, "name": "fence"}, + {"color": [10, 255, 71], "id": 33, "isthing": 1, "name": "desk"}, + {"color": [255, 41, 10], "id": 34, "isthing": 0, "name": "rock, stone"}, + {"color": [7, 255, 255], "id": 35, "isthing": 1, "name": "wardrobe, closet, press"}, + {"color": [224, 255, 8], "id": 36, "isthing": 1, "name": "lamp"}, + {"color": [102, 8, 255], "id": 37, "isthing": 1, "name": "tub"}, + {"color": [255, 61, 6], "id": 38, "isthing": 1, "name": "rail"}, + {"color": [255, 194, 7], "id": 39, "isthing": 1, "name": "cushion"}, + {"color": [255, 122, 8], "id": 40, "isthing": 0, "name": "base, pedestal, stand"}, + {"color": [0, 255, 20], "id": 41, "isthing": 1, "name": "box"}, + {"color": [255, 8, 41], "id": 42, "isthing": 1, "name": "column, pillar"}, + {"color": [255, 5, 153], "id": 43, "isthing": 1, "name": "signboard, sign"}, + { + "color": [6, 51, 255], + "id": 44, + "isthing": 1, + "name": "chest of drawers, chest, bureau, dresser", + }, + {"color": [235, 12, 255], "id": 45, "isthing": 1, "name": "counter"}, + {"color": [160, 150, 20], "id": 46, "isthing": 0, "name": "sand"}, + {"color": [0, 163, 255], "id": 47, "isthing": 1, "name": "sink"}, + {"color": [140, 140, 140], "id": 48, "isthing": 0, "name": "skyscraper"}, + {"color": [250, 10, 15], "id": 49, "isthing": 1, "name": "fireplace"}, + {"color": [20, 255, 0], "id": 50, "isthing": 1, "name": "refrigerator, icebox"}, + {"color": [31, 255, 0], "id": 51, "isthing": 0, "name": "grandstand, covered stand"}, + {"color": [255, 31, 0], "id": 52, "isthing": 0, "name": "path"}, + {"color": [255, 224, 0], "id": 53, "isthing": 1, "name": "stairs"}, + {"color": [153, 255, 0], "id": 54, "isthing": 0, "name": "runway"}, + {"color": [0, 0, 255], "id": 55, "isthing": 1, "name": "case, display case, showcase, vitrine"}, + { + "color": [255, 71, 0], + "id": 56, + "isthing": 1, + "name": "pool table, billiard table, snooker table", + }, + {"color": [0, 235, 255], "id": 57, "isthing": 1, "name": "pillow"}, + {"color": [0, 173, 255], "id": 58, "isthing": 1, "name": "screen door, screen"}, + {"color": [31, 0, 255], "id": 59, "isthing": 0, "name": "stairway, staircase"}, + {"color": [11, 200, 200], "id": 60, "isthing": 0, "name": "river"}, + {"color": [255, 82, 0], "id": 61, "isthing": 0, "name": "bridge, span"}, + {"color": [0, 255, 245], "id": 62, "isthing": 1, "name": "bookcase"}, + {"color": [0, 61, 255], "id": 63, "isthing": 0, "name": "blind, screen"}, + {"color": [0, 255, 112], "id": 64, "isthing": 1, "name": "coffee table"}, + { + "color": [0, 255, 133], + "id": 65, + "isthing": 1, + "name": "toilet, can, commode, crapper, pot, potty, stool, throne", + }, + {"color": [255, 0, 0], "id": 66, "isthing": 1, "name": "flower"}, + {"color": [255, 163, 0], "id": 67, "isthing": 1, "name": "book"}, + {"color": [255, 102, 0], "id": 68, "isthing": 0, "name": "hill"}, + {"color": [194, 255, 0], "id": 69, "isthing": 1, "name": "bench"}, + {"color": [0, 143, 255], "id": 70, "isthing": 1, "name": "countertop"}, + {"color": [51, 255, 0], "id": 71, "isthing": 1, "name": "stove"}, + {"color": [0, 82, 255], "id": 72, "isthing": 1, "name": "palm, palm tree"}, + {"color": [0, 255, 41], "id": 73, "isthing": 1, "name": "kitchen island"}, + {"color": [0, 255, 173], "id": 74, "isthing": 1, "name": "computer"}, + {"color": [10, 0, 255], "id": 75, "isthing": 1, "name": "swivel chair"}, + {"color": [173, 255, 0], "id": 76, "isthing": 1, "name": "boat"}, + {"color": [0, 255, 153], "id": 77, "isthing": 0, "name": "bar"}, + {"color": [255, 92, 0], "id": 78, "isthing": 1, "name": "arcade machine"}, + {"color": [255, 0, 255], "id": 79, "isthing": 0, "name": "hovel, hut, hutch, shack, shanty"}, + {"color": [255, 0, 245], "id": 80, "isthing": 1, "name": "bus"}, + {"color": [255, 0, 102], "id": 81, "isthing": 1, "name": "towel"}, + {"color": [255, 173, 0], "id": 82, "isthing": 1, "name": "light"}, + {"color": [255, 0, 20], "id": 83, "isthing": 1, "name": "truck"}, + {"color": [255, 184, 184], "id": 84, "isthing": 0, "name": "tower"}, + {"color": [0, 31, 255], "id": 85, "isthing": 1, "name": "chandelier"}, + {"color": [0, 255, 61], "id": 86, "isthing": 1, "name": "awning, sunshade, sunblind"}, + {"color": [0, 71, 255], "id": 87, "isthing": 1, "name": "street lamp"}, + {"color": [255, 0, 204], "id": 88, "isthing": 1, "name": "booth"}, + {"color": [0, 255, 194], "id": 89, "isthing": 1, "name": "tv"}, + {"color": [0, 255, 82], "id": 90, "isthing": 1, "name": "plane"}, + {"color": [0, 10, 255], "id": 91, "isthing": 0, "name": "dirt track"}, + {"color": [0, 112, 255], "id": 92, "isthing": 1, "name": "clothes"}, + {"color": [51, 0, 255], "id": 93, "isthing": 1, "name": "pole"}, + {"color": [0, 194, 255], "id": 94, "isthing": 0, "name": "land, ground, soil"}, + { + "color": [0, 122, 255], + "id": 95, + "isthing": 1, + "name": "bannister, banister, balustrade, balusters, handrail", + }, + { + "color": [0, 255, 163], + "id": 96, + "isthing": 0, + "name": "escalator, moving staircase, moving stairway", + }, + { + "color": [255, 153, 0], + "id": 97, + "isthing": 1, + "name": "ottoman, pouf, pouffe, puff, hassock", + }, + {"color": [0, 255, 10], "id": 98, "isthing": 1, "name": "bottle"}, + {"color": [255, 112, 0], "id": 99, "isthing": 0, "name": "buffet, counter, sideboard"}, + { + "color": [143, 255, 0], + "id": 100, + "isthing": 0, + "name": "poster, posting, placard, notice, bill, card", + }, + {"color": [82, 0, 255], "id": 101, "isthing": 0, "name": "stage"}, + {"color": [163, 255, 0], "id": 102, "isthing": 1, "name": "van"}, + {"color": [255, 235, 0], "id": 103, "isthing": 1, "name": "ship"}, + {"color": [8, 184, 170], "id": 104, "isthing": 1, "name": "fountain"}, + { + "color": [133, 0, 255], + "id": 105, + "isthing": 0, + "name": "conveyer belt, conveyor belt, conveyer, conveyor, transporter", + }, + {"color": [0, 255, 92], "id": 106, "isthing": 0, "name": "canopy"}, + { + "color": [184, 0, 255], + "id": 107, + "isthing": 1, + "name": "washer, automatic washer, washing machine", + }, + {"color": [255, 0, 31], "id": 108, "isthing": 1, "name": "plaything, toy"}, + {"color": [0, 184, 255], "id": 109, "isthing": 0, "name": "pool"}, + {"color": [0, 214, 255], "id": 110, "isthing": 1, "name": "stool"}, + {"color": [255, 0, 112], "id": 111, "isthing": 1, "name": "barrel, cask"}, + {"color": [92, 255, 0], "id": 112, "isthing": 1, "name": "basket, handbasket"}, + {"color": [0, 224, 255], "id": 113, "isthing": 0, "name": "falls"}, + {"color": [112, 224, 255], "id": 114, "isthing": 0, "name": "tent"}, + {"color": [70, 184, 160], "id": 115, "isthing": 1, "name": "bag"}, + {"color": [163, 0, 255], "id": 116, "isthing": 1, "name": "minibike, motorbike"}, + {"color": [153, 0, 255], "id": 117, "isthing": 0, "name": "cradle"}, + {"color": [71, 255, 0], "id": 118, "isthing": 1, "name": "oven"}, + {"color": [255, 0, 163], "id": 119, "isthing": 1, "name": "ball"}, + {"color": [255, 204, 0], "id": 120, "isthing": 1, "name": "food, solid food"}, + {"color": [255, 0, 143], "id": 121, "isthing": 1, "name": "step, stair"}, + {"color": [0, 255, 235], "id": 122, "isthing": 0, "name": "tank, storage tank"}, + {"color": [133, 255, 0], "id": 123, "isthing": 1, "name": "trade name"}, + {"color": [255, 0, 235], "id": 124, "isthing": 1, "name": "microwave"}, + {"color": [245, 0, 255], "id": 125, "isthing": 1, "name": "pot"}, + {"color": [255, 0, 122], "id": 126, "isthing": 1, "name": "animal"}, + {"color": [255, 245, 0], "id": 127, "isthing": 1, "name": "bicycle"}, + {"color": [10, 190, 212], "id": 128, "isthing": 0, "name": "lake"}, + {"color": [214, 255, 0], "id": 129, "isthing": 1, "name": "dishwasher"}, + {"color": [0, 204, 255], "id": 130, "isthing": 1, "name": "screen"}, + {"color": [20, 0, 255], "id": 131, "isthing": 0, "name": "blanket, cover"}, + {"color": [255, 255, 0], "id": 132, "isthing": 1, "name": "sculpture"}, + {"color": [0, 153, 255], "id": 133, "isthing": 1, "name": "hood, exhaust hood"}, + {"color": [0, 41, 255], "id": 134, "isthing": 1, "name": "sconce"}, + {"color": [0, 255, 204], "id": 135, "isthing": 1, "name": "vase"}, + {"color": [41, 0, 255], "id": 136, "isthing": 1, "name": "traffic light"}, + {"color": [41, 255, 0], "id": 137, "isthing": 1, "name": "tray"}, + {"color": [173, 0, 255], "id": 138, "isthing": 1, "name": "trash can"}, + {"color": [0, 245, 255], "id": 139, "isthing": 1, "name": "fan"}, + {"color": [71, 0, 255], "id": 140, "isthing": 0, "name": "pier"}, + {"color": [122, 0, 255], "id": 141, "isthing": 0, "name": "crt screen"}, + {"color": [0, 255, 184], "id": 142, "isthing": 1, "name": "plate"}, + {"color": [0, 92, 255], "id": 143, "isthing": 1, "name": "monitor"}, + {"color": [184, 255, 0], "id": 144, "isthing": 1, "name": "bulletin board"}, + {"color": [0, 133, 255], "id": 145, "isthing": 0, "name": "shower"}, + {"color": [255, 214, 0], "id": 146, "isthing": 1, "name": "radiator"}, + {"color": [25, 194, 194], "id": 147, "isthing": 1, "name": "glass, drinking glass"}, + {"color": [102, 255, 0], "id": 148, "isthing": 1, "name": "clock"}, + {"color": [92, 0, 255], "id": 149, "isthing": 1, "name": "flag"}, +] + +CITYSCAPES_CATEGORIES = [ + {"color": (128, 64, 128), "isthing": 0, "id": 7, "trainId": 0, "name": "road"}, + {"color": (244, 35, 232), "isthing": 0, "id": 8, "trainId": 1, "name": "sidewalk"}, + {"color": (70, 70, 70), "isthing": 0, "id": 11, "trainId": 2, "name": "building"}, + {"color": (102, 102, 156), "isthing": 0, "id": 12, "trainId": 3, "name": "wall"}, + {"color": (190, 153, 153), "isthing": 0, "id": 13, "trainId": 4, "name": "fence"}, + {"color": (153, 153, 153), "isthing": 0, "id": 17, "trainId": 5, "name": "pole"}, + {"color": (250, 170, 30), "isthing": 0, "id": 19, "trainId": 6, "name": "traffic light"}, + {"color": (220, 220, 0), "isthing": 0, "id": 20, "trainId": 7, "name": "traffic sign"}, + {"color": (107, 142, 35), "isthing": 0, "id": 21, "trainId": 8, "name": "vegetation"}, + {"color": (152, 251, 152), "isthing": 0, "id": 22, "trainId": 9, "name": "terrain"}, + {"color": (70, 130, 180), "isthing": 0, "id": 23, "trainId": 10, "name": "sky"}, + {"color": (220, 20, 60), "isthing": 1, "id": 24, "trainId": 11, "name": "person"}, + {"color": (255, 0, 0), "isthing": 1, "id": 25, "trainId": 12, "name": "rider"}, + {"color": (0, 0, 142), "isthing": 1, "id": 26, "trainId": 13, "name": "car"}, + {"color": (0, 0, 70), "isthing": 1, "id": 27, "trainId": 14, "name": "truck"}, + {"color": (0, 60, 100), "isthing": 1, "id": 28, "trainId": 15, "name": "bus"}, + {"color": (0, 80, 100), "isthing": 1, "id": 31, "trainId": 16, "name": "train"}, + {"color": (0, 0, 230), "isthing": 1, "id": 32, "trainId": 17, "name": "motorcycle"}, + {"color": (119, 11, 32), "isthing": 1, "id": 33, "trainId": 18, "name": "bicycle"}, +] + +ADE20K_847_CATEGORIES = [ + {"name": "wall", "id": 2978, "trainId": 0}, + {"name": "building, edifice", "id": 312, "trainId": 1}, + {"name": "sky", "id": 2420, "trainId": 2}, + {"name": "tree", "id": 2855, "trainId": 3}, + {"name": "road, route", "id": 2131, "trainId": 4}, + {"name": "floor, flooring", "id": 976, "trainId": 5}, + {"name": "ceiling", "id": 447, "trainId": 6}, + {"name": "bed", "id": 165, "trainId": 7}, + {"name": "sidewalk, pavement", "id": 2377, "trainId": 8}, + {"name": "earth, ground", "id": 838, "trainId": 9}, + {"name": "cabinet", "id": 350, "trainId": 10}, + {"name": "person, individual, someone, somebody, mortal, soul", "id": 1831, "trainId": 11}, + {"name": "grass", "id": 1125, "trainId": 12}, + {"name": "windowpane, window", "id": 3055, "trainId": 13}, + {"name": "car, auto, automobile, machine, motorcar", "id": 401, "trainId": 14}, + {"name": "mountain, mount", "id": 1610, "trainId": 15}, + {"name": "plant, flora, plant life", "id": 1910, "trainId": 16}, + {"name": "table", "id": 2684, "trainId": 17}, + {"name": "chair", "id": 471, "trainId": 18}, + {"name": "curtain, drape, drapery, mantle, pall", "id": 687, "trainId": 19}, + {"name": "door", "id": 774, "trainId": 20}, + {"name": "sofa, couch, lounge", "id": 2473, "trainId": 21}, + {"name": "sea", "id": 2264, "trainId": 22}, + {"name": "painting, picture", "id": 1735, "trainId": 23}, + {"name": "water", "id": 2994, "trainId": 24}, + {"name": "mirror", "id": 1564, "trainId": 25}, + {"name": "house", "id": 1276, "trainId": 26}, + {"name": "rug, carpet, carpeting", "id": 2178, "trainId": 27}, + {"name": "shelf", "id": 2329, "trainId": 28}, + {"name": "armchair", "id": 57, "trainId": 29}, + {"name": "fence, fencing", "id": 907, "trainId": 30}, + {"name": "field", "id": 913, "trainId": 31}, + {"name": "lamp", "id": 1395, "trainId": 32}, + {"name": "rock, stone", "id": 2138, "trainId": 33}, + {"name": "seat", "id": 2272, "trainId": 34}, + {"name": "river", "id": 2128, "trainId": 35}, + {"name": "desk", "id": 724, "trainId": 36}, + {"name": "bathtub, bathing tub, bath, tub", "id": 155, "trainId": 37}, + {"name": "railing, rail", "id": 2053, "trainId": 38}, + {"name": "signboard, sign", "id": 2380, "trainId": 39}, + {"name": "cushion", "id": 689, "trainId": 40}, + {"name": "path", "id": 1788, "trainId": 41}, + {"name": "work surface", "id": 3087, "trainId": 42}, + {"name": "stairs, steps", "id": 2530, "trainId": 43}, + {"name": "column, pillar", "id": 581, "trainId": 44}, + {"name": "sink", "id": 2388, "trainId": 45}, + {"name": "wardrobe, closet, press", "id": 2985, "trainId": 46}, + {"name": "snow", "id": 2454, "trainId": 47}, + {"name": "refrigerator, icebox", "id": 2096, "trainId": 48}, + {"name": "base, pedestal, stand", "id": 137, "trainId": 49}, + {"name": "bridge, span", "id": 294, "trainId": 50}, + {"name": "blind, screen", "id": 212, "trainId": 51}, + {"name": "runway", "id": 2185, "trainId": 52}, + {"name": "cliff, drop, drop-off", "id": 524, "trainId": 53}, + {"name": "sand", "id": 2212, "trainId": 54}, + {"name": "fireplace, hearth, open fireplace", "id": 943, "trainId": 55}, + {"name": "pillow", "id": 1869, "trainId": 56}, + {"name": "screen door, screen", "id": 2251, "trainId": 57}, + {"name": "toilet, can, commode, crapper, pot, potty, stool, throne", "id": 2793, "trainId": 58}, + {"name": "skyscraper", "id": 2423, "trainId": 59}, + {"name": "grandstand, covered stand", "id": 1121, "trainId": 60}, + {"name": "box", "id": 266, "trainId": 61}, + {"name": "pool table, billiard table, snooker table", "id": 1948, "trainId": 62}, + {"name": "palm, palm tree", "id": 1744, "trainId": 63}, + {"name": "double door", "id": 783, "trainId": 64}, + {"name": "coffee table, cocktail table", "id": 571, "trainId": 65}, + {"name": "counter", "id": 627, "trainId": 66}, + {"name": "countertop", "id": 629, "trainId": 67}, + {"name": "chest of drawers, chest, bureau, dresser", "id": 491, "trainId": 68}, + {"name": "kitchen island", "id": 1374, "trainId": 69}, + {"name": "boat", "id": 223, "trainId": 70}, + {"name": "waterfall, falls", "id": 3016, "trainId": 71}, + { + "name": "stove, kitchen stove, range, kitchen range, cooking stove", + "id": 2598, + "trainId": 72, + }, + {"name": "flower", "id": 978, "trainId": 73}, + {"name": "bookcase", "id": 239, "trainId": 74}, + {"name": "controls", "id": 608, "trainId": 75}, + {"name": "book", "id": 236, "trainId": 76}, + {"name": "stairway, staircase", "id": 2531, "trainId": 77}, + {"name": "streetlight, street lamp", "id": 2616, "trainId": 78}, + { + "name": "computer, computing machine, computing device, data processor, electronic computer, information processing system", + "id": 591, + "trainId": 79, + }, + { + "name": "bus, autobus, coach, charabanc, double-decker, jitney, motorbus, motorcoach, omnibus, passenger vehicle", + "id": 327, + "trainId": 80, + }, + {"name": "swivel chair", "id": 2679, "trainId": 81}, + {"name": "light, light source", "id": 1451, "trainId": 82}, + {"name": "bench", "id": 181, "trainId": 83}, + {"name": "case, display case, showcase, vitrine", "id": 420, "trainId": 84}, + {"name": "towel", "id": 2821, "trainId": 85}, + {"name": "fountain", "id": 1023, "trainId": 86}, + {"name": "embankment", "id": 855, "trainId": 87}, + { + "name": "television receiver, television, television set, tv, tv set, idiot box, boob tube, telly, goggle box", + "id": 2733, + "trainId": 88, + }, + {"name": "van", "id": 2928, "trainId": 89}, + {"name": "hill", "id": 1240, "trainId": 90}, + {"name": "awning, sunshade, sunblind", "id": 77, "trainId": 91}, + {"name": "poster, posting, placard, notice, bill, card", "id": 1969, "trainId": 92}, + {"name": "truck, motortruck", "id": 2880, "trainId": 93}, + {"name": "airplane, aeroplane, plane", "id": 14, "trainId": 94}, + {"name": "pole", "id": 1936, "trainId": 95}, + {"name": "tower", "id": 2828, "trainId": 96}, + {"name": "court", "id": 631, "trainId": 97}, + {"name": "ball", "id": 103, "trainId": 98}, + { + "name": "aircraft carrier, carrier, flattop, attack aircraft carrier", + "id": 3144, + "trainId": 99, + }, + {"name": "buffet, counter, sideboard", "id": 308, "trainId": 100}, + {"name": "hovel, hut, hutch, shack, shanty", "id": 1282, "trainId": 101}, + {"name": "apparel, wearing apparel, dress, clothes", "id": 38, "trainId": 102}, + {"name": "minibike, motorbike", "id": 1563, "trainId": 103}, + {"name": "animal, animate being, beast, brute, creature, fauna", "id": 29, "trainId": 104}, + {"name": "chandelier, pendant, pendent", "id": 480, "trainId": 105}, + {"name": "step, stair", "id": 2569, "trainId": 106}, + {"name": "booth, cubicle, stall, kiosk", "id": 247, "trainId": 107}, + {"name": "bicycle, bike, wheel, cycle", "id": 187, "trainId": 108}, + {"name": "doorframe, doorcase", "id": 778, "trainId": 109}, + {"name": "sconce", "id": 2243, "trainId": 110}, + {"name": "pond", "id": 1941, "trainId": 111}, + {"name": "trade name, brand name, brand, marque", "id": 2833, "trainId": 112}, + {"name": "bannister, banister, balustrade, balusters, handrail", "id": 120, "trainId": 113}, + {"name": "bag", "id": 95, "trainId": 114}, + {"name": "traffic light, traffic signal, stoplight", "id": 2836, "trainId": 115}, + {"name": "gazebo", "id": 1087, "trainId": 116}, + {"name": "escalator, moving staircase, moving stairway", "id": 868, "trainId": 117}, + {"name": "land, ground, soil", "id": 1401, "trainId": 118}, + {"name": "board, plank", "id": 220, "trainId": 119}, + {"name": "arcade machine", "id": 47, "trainId": 120}, + {"name": "eiderdown, duvet, continental quilt", "id": 843, "trainId": 121}, + {"name": "bar", "id": 123, "trainId": 122}, + {"name": "stall, stand, sales booth", "id": 2537, "trainId": 123}, + {"name": "playground", "id": 1927, "trainId": 124}, + {"name": "ship", "id": 2337, "trainId": 125}, + {"name": "ottoman, pouf, pouffe, puff, hassock", "id": 1702, "trainId": 126}, + { + "name": "ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin", + "id": 64, + "trainId": 127, + }, + {"name": "bottle", "id": 249, "trainId": 128}, + {"name": "cradle", "id": 642, "trainId": 129}, + {"name": "pot, flowerpot", "id": 1981, "trainId": 130}, + { + "name": "conveyer belt, conveyor belt, conveyer, conveyor, transporter", + "id": 609, + "trainId": 131, + }, + {"name": "train, railroad train", "id": 2840, "trainId": 132}, + {"name": "stool", "id": 2586, "trainId": 133}, + {"name": "lake", "id": 1393, "trainId": 134}, + {"name": "tank, storage tank", "id": 2704, "trainId": 135}, + {"name": "ice, water ice", "id": 1304, "trainId": 136}, + {"name": "basket, handbasket", "id": 146, "trainId": 137}, + {"name": "manhole", "id": 1494, "trainId": 138}, + {"name": "tent, collapsible shelter", "id": 2739, "trainId": 139}, + {"name": "canopy", "id": 389, "trainId": 140}, + {"name": "microwave, microwave oven", "id": 1551, "trainId": 141}, + {"name": "barrel, cask", "id": 131, "trainId": 142}, + {"name": "dirt track", "id": 738, "trainId": 143}, + {"name": "beam", "id": 161, "trainId": 144}, + {"name": "dishwasher, dish washer, dishwashing machine", "id": 747, "trainId": 145}, + {"name": "plate", "id": 1919, "trainId": 146}, + {"name": "screen, crt screen", "id": 3109, "trainId": 147}, + {"name": "ruins", "id": 2179, "trainId": 148}, + {"name": "washer, automatic washer, washing machine", "id": 2989, "trainId": 149}, + {"name": "blanket, cover", "id": 206, "trainId": 150}, + {"name": "plaything, toy", "id": 1930, "trainId": 151}, + {"name": "food, solid food", "id": 1002, "trainId": 152}, + {"name": "screen, silver screen, projection screen", "id": 2254, "trainId": 153}, + {"name": "oven", "id": 1708, "trainId": 154}, + {"name": "stage", "id": 2526, "trainId": 155}, + {"name": "beacon, lighthouse, beacon light, pharos", "id": 160, "trainId": 156}, + {"name": "umbrella", "id": 2901, "trainId": 157}, + {"name": "sculpture", "id": 2262, "trainId": 158}, + {"name": "aqueduct", "id": 44, "trainId": 159}, + {"name": "container", "id": 597, "trainId": 160}, + {"name": "scaffolding, staging", "id": 2235, "trainId": 161}, + {"name": "hood, exhaust hood", "id": 1260, "trainId": 162}, + {"name": "curb, curbing, kerb", "id": 682, "trainId": 163}, + {"name": "roller coaster", "id": 2151, "trainId": 164}, + {"name": "horse, equus caballus", "id": 3107, "trainId": 165}, + {"name": "catwalk", "id": 432, "trainId": 166}, + {"name": "glass, drinking glass", "id": 1098, "trainId": 167}, + {"name": "vase", "id": 2932, "trainId": 168}, + {"name": "central reservation", "id": 461, "trainId": 169}, + {"name": "carousel", "id": 410, "trainId": 170}, + {"name": "radiator", "id": 2046, "trainId": 171}, + {"name": "closet", "id": 533, "trainId": 172}, + {"name": "machine", "id": 1481, "trainId": 173}, + {"name": "pier, wharf, wharfage, dock", "id": 1858, "trainId": 174}, + {"name": "fan", "id": 894, "trainId": 175}, + {"name": "inflatable bounce game", "id": 1322, "trainId": 176}, + {"name": "pitch", "id": 1891, "trainId": 177}, + {"name": "paper", "id": 1756, "trainId": 178}, + {"name": "arcade, colonnade", "id": 49, "trainId": 179}, + {"name": "hot tub", "id": 1272, "trainId": 180}, + {"name": "helicopter", "id": 1229, "trainId": 181}, + {"name": "tray", "id": 2850, "trainId": 182}, + {"name": "partition, divider", "id": 1784, "trainId": 183}, + {"name": "vineyard", "id": 2962, "trainId": 184}, + {"name": "bowl", "id": 259, "trainId": 185}, + {"name": "bullring", "id": 319, "trainId": 186}, + {"name": "flag", "id": 954, "trainId": 187}, + {"name": "pot", "id": 1974, "trainId": 188}, + {"name": "footbridge, overcrossing, pedestrian bridge", "id": 1013, "trainId": 189}, + {"name": "shower", "id": 2356, "trainId": 190}, + {"name": "bag, traveling bag, travelling bag, grip, suitcase", "id": 97, "trainId": 191}, + {"name": "bulletin board, notice board", "id": 318, "trainId": 192}, + {"name": "confessional booth", "id": 592, "trainId": 193}, + {"name": "trunk, tree trunk, bole", "id": 2885, "trainId": 194}, + {"name": "forest", "id": 1017, "trainId": 195}, + {"name": "elevator door", "id": 851, "trainId": 196}, + {"name": "laptop, laptop computer", "id": 1407, "trainId": 197}, + {"name": "instrument panel", "id": 1332, "trainId": 198}, + {"name": "bucket, pail", "id": 303, "trainId": 199}, + {"name": "tapestry, tapis", "id": 2714, "trainId": 200}, + {"name": "platform", "id": 1924, "trainId": 201}, + {"name": "jacket", "id": 1346, "trainId": 202}, + {"name": "gate", "id": 1081, "trainId": 203}, + {"name": "monitor, monitoring device", "id": 1583, "trainId": 204}, + { + "name": "telephone booth, phone booth, call box, telephone box, telephone kiosk", + "id": 2727, + "trainId": 205, + }, + {"name": "spotlight, spot", "id": 2509, "trainId": 206}, + {"name": "ring", "id": 2123, "trainId": 207}, + {"name": "control panel", "id": 602, "trainId": 208}, + {"name": "blackboard, chalkboard", "id": 202, "trainId": 209}, + {"name": "air conditioner, air conditioning", "id": 10, "trainId": 210}, + {"name": "chest", "id": 490, "trainId": 211}, + {"name": "clock", "id": 530, "trainId": 212}, + {"name": "sand dune", "id": 2213, "trainId": 213}, + {"name": "pipe, pipage, piping", "id": 1884, "trainId": 214}, + {"name": "vault", "id": 2934, "trainId": 215}, + {"name": "table football", "id": 2687, "trainId": 216}, + {"name": "cannon", "id": 387, "trainId": 217}, + {"name": "swimming pool, swimming bath, natatorium", "id": 2668, "trainId": 218}, + {"name": "fluorescent, fluorescent fixture", "id": 982, "trainId": 219}, + {"name": "statue", "id": 2547, "trainId": 220}, + { + "name": "loudspeaker, speaker, speaker unit, loudspeaker system, speaker system", + "id": 1474, + "trainId": 221, + }, + {"name": "exhibitor", "id": 877, "trainId": 222}, + {"name": "ladder", "id": 1391, "trainId": 223}, + {"name": "carport", "id": 414, "trainId": 224}, + {"name": "dam", "id": 698, "trainId": 225}, + {"name": "pulpit", "id": 2019, "trainId": 226}, + {"name": "skylight, fanlight", "id": 2422, "trainId": 227}, + {"name": "water tower", "id": 3010, "trainId": 228}, + {"name": "grill, grille, grillwork", "id": 1139, "trainId": 229}, + {"name": "display board", "id": 753, "trainId": 230}, + {"name": "pane, pane of glass, window glass", "id": 1747, "trainId": 231}, + {"name": "rubbish, trash, scrap", "id": 2175, "trainId": 232}, + {"name": "ice rink", "id": 1301, "trainId": 233}, + {"name": "fruit", "id": 1033, "trainId": 234}, + {"name": "patio", "id": 1789, "trainId": 235}, + {"name": "vending machine", "id": 2939, "trainId": 236}, + {"name": "telephone, phone, telephone set", "id": 2730, "trainId": 237}, + {"name": "net", "id": 1652, "trainId": 238}, + { + "name": "backpack, back pack, knapsack, packsack, rucksack, haversack", + "id": 90, + "trainId": 239, + }, + {"name": "jar", "id": 1349, "trainId": 240}, + {"name": "track", "id": 2830, "trainId": 241}, + {"name": "magazine", "id": 1485, "trainId": 242}, + {"name": "shutter", "id": 2370, "trainId": 243}, + {"name": "roof", "id": 2155, "trainId": 244}, + {"name": "banner, streamer", "id": 118, "trainId": 245}, + {"name": "landfill", "id": 1402, "trainId": 246}, + {"name": "post", "id": 1957, "trainId": 247}, + {"name": "altarpiece, reredos", "id": 3130, "trainId": 248}, + {"name": "hat, chapeau, lid", "id": 1197, "trainId": 249}, + {"name": "arch, archway", "id": 52, "trainId": 250}, + {"name": "table game", "id": 2688, "trainId": 251}, + {"name": "bag, handbag, pocketbook, purse", "id": 96, "trainId": 252}, + {"name": "document, written document, papers", "id": 762, "trainId": 253}, + {"name": "dome", "id": 772, "trainId": 254}, + {"name": "pier", "id": 1857, "trainId": 255}, + {"name": "shanties", "id": 2315, "trainId": 256}, + {"name": "forecourt", "id": 1016, "trainId": 257}, + {"name": "crane", "id": 643, "trainId": 258}, + {"name": "dog, domestic dog, canis familiaris", "id": 3105, "trainId": 259}, + {"name": "piano, pianoforte, forte-piano", "id": 1849, "trainId": 260}, + {"name": "drawing", "id": 791, "trainId": 261}, + {"name": "cabin", "id": 349, "trainId": 262}, + { + "name": "ad, advertisement, advertizement, advertising, advertizing, advert", + "id": 6, + "trainId": 263, + }, + {"name": "amphitheater, amphitheatre, coliseum", "id": 3114, "trainId": 264}, + {"name": "monument", "id": 1587, "trainId": 265}, + {"name": "henhouse", "id": 1233, "trainId": 266}, + {"name": "cockpit", "id": 559, "trainId": 267}, + {"name": "heater, warmer", "id": 1223, "trainId": 268}, + {"name": "windmill, aerogenerator, wind generator", "id": 3049, "trainId": 269}, + {"name": "pool", "id": 1943, "trainId": 270}, + {"name": "elevator, lift", "id": 853, "trainId": 271}, + {"name": "decoration, ornament, ornamentation", "id": 709, "trainId": 272}, + {"name": "labyrinth", "id": 1390, "trainId": 273}, + {"name": "text, textual matter", "id": 2748, "trainId": 274}, + {"name": "printer", "id": 2007, "trainId": 275}, + {"name": "mezzanine, first balcony", "id": 1546, "trainId": 276}, + {"name": "mattress", "id": 1513, "trainId": 277}, + {"name": "straw", "id": 2600, "trainId": 278}, + {"name": "stalls", "id": 2538, "trainId": 279}, + {"name": "patio, terrace", "id": 1790, "trainId": 280}, + {"name": "billboard, hoarding", "id": 194, "trainId": 281}, + {"name": "bus stop", "id": 326, "trainId": 282}, + {"name": "trouser, pant", "id": 2877, "trainId": 283}, + {"name": "console table, console", "id": 594, "trainId": 284}, + {"name": "rack", "id": 2036, "trainId": 285}, + {"name": "notebook", "id": 1662, "trainId": 286}, + {"name": "shrine", "id": 2366, "trainId": 287}, + {"name": "pantry", "id": 1754, "trainId": 288}, + {"name": "cart", "id": 418, "trainId": 289}, + {"name": "steam shovel", "id": 2553, "trainId": 290}, + {"name": "porch", "id": 1951, "trainId": 291}, + {"name": "postbox, mailbox, letter box", "id": 1963, "trainId": 292}, + {"name": "figurine, statuette", "id": 918, "trainId": 293}, + {"name": "recycling bin", "id": 2086, "trainId": 294}, + {"name": "folding screen", "id": 997, "trainId": 295}, + {"name": "telescope", "id": 2731, "trainId": 296}, + {"name": "deck chair, beach chair", "id": 704, "trainId": 297}, + {"name": "kennel", "id": 1365, "trainId": 298}, + {"name": "coffee maker", "id": 569, "trainId": 299}, + {"name": "altar, communion table, lord's table", "id": 3108, "trainId": 300}, + {"name": "fish", "id": 948, "trainId": 301}, + {"name": "easel", "id": 839, "trainId": 302}, + {"name": "artificial golf green", "id": 63, "trainId": 303}, + {"name": "iceberg", "id": 1305, "trainId": 304}, + {"name": "candlestick, candle holder", "id": 378, "trainId": 305}, + {"name": "shower stall, shower bath", "id": 2362, "trainId": 306}, + {"name": "television stand", "id": 2734, "trainId": 307}, + { + "name": "wall socket, wall plug, electric outlet, electrical outlet, outlet, electric receptacle", + "id": 2982, + "trainId": 308, + }, + {"name": "skeleton", "id": 2398, "trainId": 309}, + {"name": "grand piano, grand", "id": 1119, "trainId": 310}, + {"name": "candy, confect", "id": 382, "trainId": 311}, + {"name": "grille door", "id": 1141, "trainId": 312}, + {"name": "pedestal, plinth, footstall", "id": 1805, "trainId": 313}, + {"name": "jersey, t-shirt, tee shirt", "id": 3102, "trainId": 314}, + {"name": "shoe", "id": 2341, "trainId": 315}, + {"name": "gravestone, headstone, tombstone", "id": 1131, "trainId": 316}, + {"name": "shanty", "id": 2316, "trainId": 317}, + {"name": "structure", "id": 2626, "trainId": 318}, + {"name": "rocking chair, rocker", "id": 3104, "trainId": 319}, + {"name": "bird", "id": 198, "trainId": 320}, + {"name": "place mat", "id": 1896, "trainId": 321}, + {"name": "tomb", "id": 2800, "trainId": 322}, + {"name": "big top", "id": 190, "trainId": 323}, + {"name": "gas pump, gasoline pump, petrol pump, island dispenser", "id": 3131, "trainId": 324}, + {"name": "lockers", "id": 1463, "trainId": 325}, + {"name": "cage", "id": 357, "trainId": 326}, + {"name": "finger", "id": 929, "trainId": 327}, + {"name": "bleachers", "id": 209, "trainId": 328}, + {"name": "ferris wheel", "id": 912, "trainId": 329}, + {"name": "hairdresser chair", "id": 1164, "trainId": 330}, + {"name": "mat", "id": 1509, "trainId": 331}, + {"name": "stands", "id": 2539, "trainId": 332}, + {"name": "aquarium, fish tank, marine museum", "id": 3116, "trainId": 333}, + {"name": "streetcar, tram, tramcar, trolley, trolley car", "id": 2615, "trainId": 334}, + {"name": "napkin, table napkin, serviette", "id": 1644, "trainId": 335}, + {"name": "dummy", "id": 818, "trainId": 336}, + {"name": "booklet, brochure, folder, leaflet, pamphlet", "id": 242, "trainId": 337}, + {"name": "sand trap", "id": 2217, "trainId": 338}, + {"name": "shop, store", "id": 2347, "trainId": 339}, + {"name": "table cloth", "id": 2686, "trainId": 340}, + {"name": "service station", "id": 2300, "trainId": 341}, + {"name": "coffin", "id": 572, "trainId": 342}, + {"name": "drawer", "id": 789, "trainId": 343}, + {"name": "cages", "id": 358, "trainId": 344}, + {"name": "slot machine, coin machine", "id": 2443, "trainId": 345}, + {"name": "balcony", "id": 101, "trainId": 346}, + {"name": "volleyball court", "id": 2969, "trainId": 347}, + {"name": "table tennis", "id": 2692, "trainId": 348}, + {"name": "control table", "id": 606, "trainId": 349}, + {"name": "shirt", "id": 2339, "trainId": 350}, + {"name": "merchandise, ware, product", "id": 1533, "trainId": 351}, + {"name": "railway", "id": 2060, "trainId": 352}, + {"name": "parterre", "id": 1782, "trainId": 353}, + {"name": "chimney", "id": 495, "trainId": 354}, + {"name": "can, tin, tin can", "id": 371, "trainId": 355}, + {"name": "tanks", "id": 2707, "trainId": 356}, + {"name": "fabric, cloth, material, textile", "id": 889, "trainId": 357}, + {"name": "alga, algae", "id": 3156, "trainId": 358}, + {"name": "system", "id": 2683, "trainId": 359}, + {"name": "map", "id": 1499, "trainId": 360}, + {"name": "greenhouse", "id": 1135, "trainId": 361}, + {"name": "mug", "id": 1619, "trainId": 362}, + {"name": "barbecue", "id": 125, "trainId": 363}, + {"name": "trailer", "id": 2838, "trainId": 364}, + {"name": "toilet tissue, toilet paper, bathroom tissue", "id": 2792, "trainId": 365}, + {"name": "organ", "id": 1695, "trainId": 366}, + {"name": "dishrag, dishcloth", "id": 746, "trainId": 367}, + {"name": "island", "id": 1343, "trainId": 368}, + {"name": "keyboard", "id": 1370, "trainId": 369}, + {"name": "trench", "id": 2858, "trainId": 370}, + {"name": "basket, basketball hoop, hoop", "id": 145, "trainId": 371}, + {"name": "steering wheel, wheel", "id": 2565, "trainId": 372}, + {"name": "pitcher, ewer", "id": 1892, "trainId": 373}, + {"name": "goal", "id": 1103, "trainId": 374}, + {"name": "bread, breadstuff, staff of life", "id": 286, "trainId": 375}, + {"name": "beds", "id": 170, "trainId": 376}, + {"name": "wood", "id": 3073, "trainId": 377}, + {"name": "file cabinet", "id": 922, "trainId": 378}, + {"name": "newspaper, paper", "id": 1655, "trainId": 379}, + {"name": "motorboat", "id": 1602, "trainId": 380}, + {"name": "rope", "id": 2160, "trainId": 381}, + {"name": "guitar", "id": 1151, "trainId": 382}, + {"name": "rubble", "id": 2176, "trainId": 383}, + {"name": "scarf", "id": 2239, "trainId": 384}, + {"name": "barrels", "id": 132, "trainId": 385}, + {"name": "cap", "id": 394, "trainId": 386}, + {"name": "leaves", "id": 1424, "trainId": 387}, + {"name": "control tower", "id": 607, "trainId": 388}, + {"name": "dashboard", "id": 700, "trainId": 389}, + {"name": "bandstand", "id": 116, "trainId": 390}, + {"name": "lectern", "id": 1425, "trainId": 391}, + {"name": "switch, electric switch, electrical switch", "id": 2676, "trainId": 392}, + {"name": "baseboard, mopboard, skirting board", "id": 141, "trainId": 393}, + {"name": "shower room", "id": 2360, "trainId": 394}, + {"name": "smoke", "id": 2449, "trainId": 395}, + {"name": "faucet, spigot", "id": 897, "trainId": 396}, + {"name": "bulldozer", "id": 317, "trainId": 397}, + {"name": "saucepan", "id": 2228, "trainId": 398}, + {"name": "shops", "id": 2351, "trainId": 399}, + {"name": "meter", "id": 1543, "trainId": 400}, + {"name": "crevasse", "id": 656, "trainId": 401}, + {"name": "gear", "id": 1088, "trainId": 402}, + {"name": "candelabrum, candelabra", "id": 373, "trainId": 403}, + {"name": "sofa bed", "id": 2472, "trainId": 404}, + {"name": "tunnel", "id": 2892, "trainId": 405}, + {"name": "pallet", "id": 1740, "trainId": 406}, + {"name": "wire, conducting wire", "id": 3067, "trainId": 407}, + {"name": "kettle, boiler", "id": 1367, "trainId": 408}, + {"name": "bidet", "id": 188, "trainId": 409}, + { + "name": "baby buggy, baby carriage, carriage, perambulator, pram, stroller, go-cart, pushchair, pusher", + "id": 79, + "trainId": 410, + }, + {"name": "music stand", "id": 1633, "trainId": 411}, + {"name": "pipe, tube", "id": 1885, "trainId": 412}, + {"name": "cup", "id": 677, "trainId": 413}, + {"name": "parking meter", "id": 1779, "trainId": 414}, + {"name": "ice hockey rink", "id": 1297, "trainId": 415}, + {"name": "shelter", "id": 2334, "trainId": 416}, + {"name": "weeds", "id": 3027, "trainId": 417}, + {"name": "temple", "id": 2735, "trainId": 418}, + {"name": "patty, cake", "id": 1791, "trainId": 419}, + {"name": "ski slope", "id": 2405, "trainId": 420}, + {"name": "panel", "id": 1748, "trainId": 421}, + {"name": "wallet", "id": 2983, "trainId": 422}, + {"name": "wheel", "id": 3035, "trainId": 423}, + {"name": "towel rack, towel horse", "id": 2824, "trainId": 424}, + {"name": "roundabout", "id": 2168, "trainId": 425}, + {"name": "canister, cannister, tin", "id": 385, "trainId": 426}, + {"name": "rod", "id": 2148, "trainId": 427}, + {"name": "soap dispenser", "id": 2465, "trainId": 428}, + {"name": "bell", "id": 175, "trainId": 429}, + {"name": "canvas", "id": 390, "trainId": 430}, + {"name": "box office, ticket office, ticket booth", "id": 268, "trainId": 431}, + {"name": "teacup", "id": 2722, "trainId": 432}, + {"name": "trellis", "id": 2857, "trainId": 433}, + {"name": "workbench", "id": 3088, "trainId": 434}, + {"name": "valley, vale", "id": 2926, "trainId": 435}, + {"name": "toaster", "id": 2782, "trainId": 436}, + {"name": "knife", "id": 1378, "trainId": 437}, + {"name": "podium", "id": 1934, "trainId": 438}, + {"name": "ramp", "id": 2072, "trainId": 439}, + {"name": "tumble dryer", "id": 2889, "trainId": 440}, + {"name": "fireplug, fire hydrant, plug", "id": 944, "trainId": 441}, + {"name": "gym shoe, sneaker, tennis shoe", "id": 1158, "trainId": 442}, + {"name": "lab bench", "id": 1383, "trainId": 443}, + {"name": "equipment", "id": 867, "trainId": 444}, + {"name": "rocky formation", "id": 2145, "trainId": 445}, + {"name": "plastic", "id": 1915, "trainId": 446}, + {"name": "calendar", "id": 361, "trainId": 447}, + {"name": "caravan", "id": 402, "trainId": 448}, + {"name": "check-in-desk", "id": 482, "trainId": 449}, + {"name": "ticket counter", "id": 2761, "trainId": 450}, + {"name": "brush", "id": 300, "trainId": 451}, + {"name": "mill", "id": 1554, "trainId": 452}, + {"name": "covered bridge", "id": 636, "trainId": 453}, + {"name": "bowling alley", "id": 260, "trainId": 454}, + {"name": "hanger", "id": 1186, "trainId": 455}, + {"name": "excavator", "id": 871, "trainId": 456}, + {"name": "trestle", "id": 2859, "trainId": 457}, + {"name": "revolving door", "id": 2103, "trainId": 458}, + {"name": "blast furnace", "id": 208, "trainId": 459}, + {"name": "scale, weighing machine", "id": 2236, "trainId": 460}, + {"name": "projector", "id": 2012, "trainId": 461}, + {"name": "soap", "id": 2462, "trainId": 462}, + {"name": "locker", "id": 1462, "trainId": 463}, + {"name": "tractor", "id": 2832, "trainId": 464}, + {"name": "stretcher", "id": 2617, "trainId": 465}, + {"name": "frame", "id": 1024, "trainId": 466}, + {"name": "grating", "id": 1129, "trainId": 467}, + {"name": "alembic", "id": 18, "trainId": 468}, + {"name": "candle, taper, wax light", "id": 376, "trainId": 469}, + {"name": "barrier", "id": 134, "trainId": 470}, + {"name": "cardboard", "id": 407, "trainId": 471}, + {"name": "cave", "id": 434, "trainId": 472}, + {"name": "puddle", "id": 2017, "trainId": 473}, + {"name": "tarp", "id": 2717, "trainId": 474}, + {"name": "price tag", "id": 2005, "trainId": 475}, + {"name": "watchtower", "id": 2993, "trainId": 476}, + {"name": "meters", "id": 1545, "trainId": 477}, + { + "name": "light bulb, lightbulb, bulb, incandescent lamp, electric light, electric-light bulb", + "id": 1445, + "trainId": 478, + }, + {"name": "tracks", "id": 2831, "trainId": 479}, + {"name": "hair dryer", "id": 1161, "trainId": 480}, + {"name": "skirt", "id": 2411, "trainId": 481}, + {"name": "viaduct", "id": 2949, "trainId": 482}, + {"name": "paper towel", "id": 1769, "trainId": 483}, + {"name": "coat", "id": 552, "trainId": 484}, + {"name": "sheet", "id": 2327, "trainId": 485}, + {"name": "fire extinguisher, extinguisher, asphyxiator", "id": 939, "trainId": 486}, + {"name": "water wheel", "id": 3013, "trainId": 487}, + {"name": "pottery, clayware", "id": 1986, "trainId": 488}, + {"name": "magazine rack", "id": 1486, "trainId": 489}, + {"name": "teapot", "id": 2723, "trainId": 490}, + {"name": "microphone, mike", "id": 1549, "trainId": 491}, + {"name": "support", "id": 2649, "trainId": 492}, + {"name": "forklift", "id": 1020, "trainId": 493}, + {"name": "canyon", "id": 392, "trainId": 494}, + {"name": "cash register, register", "id": 422, "trainId": 495}, + {"name": "leaf, leafage, foliage", "id": 1419, "trainId": 496}, + {"name": "remote control, remote", "id": 2099, "trainId": 497}, + {"name": "soap dish", "id": 2464, "trainId": 498}, + {"name": "windshield, windscreen", "id": 3058, "trainId": 499}, + {"name": "cat", "id": 430, "trainId": 500}, + {"name": "cue, cue stick, pool cue, pool stick", "id": 675, "trainId": 501}, + {"name": "vent, venthole, vent-hole, blowhole", "id": 2941, "trainId": 502}, + {"name": "videos", "id": 2955, "trainId": 503}, + {"name": "shovel", "id": 2355, "trainId": 504}, + {"name": "eaves", "id": 840, "trainId": 505}, + {"name": "antenna, aerial, transmitting aerial", "id": 32, "trainId": 506}, + {"name": "shipyard", "id": 2338, "trainId": 507}, + {"name": "hen, biddy", "id": 1232, "trainId": 508}, + {"name": "traffic cone", "id": 2834, "trainId": 509}, + {"name": "washing machines", "id": 2991, "trainId": 510}, + {"name": "truck crane", "id": 2879, "trainId": 511}, + {"name": "cds", "id": 444, "trainId": 512}, + {"name": "niche", "id": 1657, "trainId": 513}, + {"name": "scoreboard", "id": 2246, "trainId": 514}, + {"name": "briefcase", "id": 296, "trainId": 515}, + {"name": "boot", "id": 245, "trainId": 516}, + {"name": "sweater, jumper", "id": 2661, "trainId": 517}, + {"name": "hay", "id": 1202, "trainId": 518}, + {"name": "pack", "id": 1714, "trainId": 519}, + {"name": "bottle rack", "id": 251, "trainId": 520}, + {"name": "glacier", "id": 1095, "trainId": 521}, + {"name": "pergola", "id": 1828, "trainId": 522}, + {"name": "building materials", "id": 311, "trainId": 523}, + {"name": "television camera", "id": 2732, "trainId": 524}, + {"name": "first floor", "id": 947, "trainId": 525}, + {"name": "rifle", "id": 2115, "trainId": 526}, + {"name": "tennis table", "id": 2738, "trainId": 527}, + {"name": "stadium", "id": 2525, "trainId": 528}, + {"name": "safety belt", "id": 2194, "trainId": 529}, + {"name": "cover", "id": 634, "trainId": 530}, + {"name": "dish rack", "id": 740, "trainId": 531}, + {"name": "synthesizer", "id": 2682, "trainId": 532}, + {"name": "pumpkin", "id": 2020, "trainId": 533}, + {"name": "gutter", "id": 1156, "trainId": 534}, + {"name": "fruit stand", "id": 1036, "trainId": 535}, + {"name": "ice floe, floe", "id": 1295, "trainId": 536}, + {"name": "handle, grip, handgrip, hold", "id": 1181, "trainId": 537}, + {"name": "wheelchair", "id": 3037, "trainId": 538}, + {"name": "mousepad, mouse mat", "id": 1614, "trainId": 539}, + {"name": "diploma", "id": 736, "trainId": 540}, + {"name": "fairground ride", "id": 893, "trainId": 541}, + {"name": "radio", "id": 2047, "trainId": 542}, + {"name": "hotplate", "id": 1274, "trainId": 543}, + {"name": "junk", "id": 1361, "trainId": 544}, + {"name": "wheelbarrow", "id": 3036, "trainId": 545}, + {"name": "stream", "id": 2606, "trainId": 546}, + {"name": "toll plaza", "id": 2797, "trainId": 547}, + {"name": "punching bag", "id": 2022, "trainId": 548}, + {"name": "trough", "id": 2876, "trainId": 549}, + {"name": "throne", "id": 2758, "trainId": 550}, + {"name": "chair desk", "id": 472, "trainId": 551}, + {"name": "weighbridge", "id": 3028, "trainId": 552}, + {"name": "extractor fan", "id": 882, "trainId": 553}, + {"name": "hanging clothes", "id": 1189, "trainId": 554}, + {"name": "dish, dish aerial, dish antenna, saucer", "id": 743, "trainId": 555}, + {"name": "alarm clock, alarm", "id": 3122, "trainId": 556}, + {"name": "ski lift", "id": 2401, "trainId": 557}, + {"name": "chain", "id": 468, "trainId": 558}, + {"name": "garage", "id": 1061, "trainId": 559}, + {"name": "mechanical shovel", "id": 1523, "trainId": 560}, + {"name": "wine rack", "id": 3059, "trainId": 561}, + {"name": "tramway", "id": 2843, "trainId": 562}, + {"name": "treadmill", "id": 2853, "trainId": 563}, + {"name": "menu", "id": 1529, "trainId": 564}, + {"name": "block", "id": 214, "trainId": 565}, + {"name": "well", "id": 3032, "trainId": 566}, + {"name": "witness stand", "id": 3071, "trainId": 567}, + {"name": "branch", "id": 277, "trainId": 568}, + {"name": "duck", "id": 813, "trainId": 569}, + {"name": "casserole", "id": 426, "trainId": 570}, + {"name": "frying pan", "id": 1039, "trainId": 571}, + {"name": "desk organizer", "id": 727, "trainId": 572}, + {"name": "mast", "id": 1508, "trainId": 573}, + {"name": "spectacles, specs, eyeglasses, glasses", "id": 2490, "trainId": 574}, + {"name": "service elevator", "id": 2299, "trainId": 575}, + {"name": "dollhouse", "id": 768, "trainId": 576}, + {"name": "hammock", "id": 1172, "trainId": 577}, + {"name": "clothes hanging", "id": 537, "trainId": 578}, + {"name": "photocopier", "id": 1847, "trainId": 579}, + {"name": "notepad", "id": 1664, "trainId": 580}, + {"name": "golf cart", "id": 1110, "trainId": 581}, + {"name": "footpath", "id": 1014, "trainId": 582}, + {"name": "cross", "id": 662, "trainId": 583}, + {"name": "baptismal font", "id": 121, "trainId": 584}, + {"name": "boiler", "id": 227, "trainId": 585}, + {"name": "skip", "id": 2410, "trainId": 586}, + {"name": "rotisserie", "id": 2165, "trainId": 587}, + {"name": "tables", "id": 2696, "trainId": 588}, + {"name": "water mill", "id": 3005, "trainId": 589}, + {"name": "helmet", "id": 1231, "trainId": 590}, + {"name": "cover curtain", "id": 635, "trainId": 591}, + {"name": "brick", "id": 292, "trainId": 592}, + {"name": "table runner", "id": 2690, "trainId": 593}, + {"name": "ashtray", "id": 65, "trainId": 594}, + {"name": "street box", "id": 2607, "trainId": 595}, + {"name": "stick", "id": 2574, "trainId": 596}, + {"name": "hangers", "id": 1188, "trainId": 597}, + {"name": "cells", "id": 456, "trainId": 598}, + {"name": "urinal", "id": 2913, "trainId": 599}, + {"name": "centerpiece", "id": 459, "trainId": 600}, + {"name": "portable fridge", "id": 1955, "trainId": 601}, + {"name": "dvds", "id": 827, "trainId": 602}, + {"name": "golf club", "id": 1111, "trainId": 603}, + {"name": "skirting board", "id": 2412, "trainId": 604}, + {"name": "water cooler", "id": 2997, "trainId": 605}, + {"name": "clipboard", "id": 528, "trainId": 606}, + {"name": "camera, photographic camera", "id": 366, "trainId": 607}, + {"name": "pigeonhole", "id": 1863, "trainId": 608}, + {"name": "chips", "id": 500, "trainId": 609}, + {"name": "food processor", "id": 1001, "trainId": 610}, + {"name": "post box", "id": 1958, "trainId": 611}, + {"name": "lid", "id": 1441, "trainId": 612}, + {"name": "drum", "id": 809, "trainId": 613}, + {"name": "blender", "id": 210, "trainId": 614}, + {"name": "cave entrance", "id": 435, "trainId": 615}, + {"name": "dental chair", "id": 718, "trainId": 616}, + {"name": "obelisk", "id": 1674, "trainId": 617}, + {"name": "canoe", "id": 388, "trainId": 618}, + {"name": "mobile", "id": 1572, "trainId": 619}, + {"name": "monitors", "id": 1584, "trainId": 620}, + {"name": "pool ball", "id": 1944, "trainId": 621}, + {"name": "cue rack", "id": 674, "trainId": 622}, + {"name": "baggage carts", "id": 99, "trainId": 623}, + {"name": "shore", "id": 2352, "trainId": 624}, + {"name": "fork", "id": 1019, "trainId": 625}, + {"name": "paper filer", "id": 1763, "trainId": 626}, + {"name": "bicycle rack", "id": 185, "trainId": 627}, + {"name": "coat rack", "id": 554, "trainId": 628}, + {"name": "garland", "id": 1066, "trainId": 629}, + {"name": "sports bag", "id": 2508, "trainId": 630}, + {"name": "fish tank", "id": 951, "trainId": 631}, + {"name": "towel dispenser", "id": 2822, "trainId": 632}, + {"name": "carriage", "id": 415, "trainId": 633}, + {"name": "brochure", "id": 297, "trainId": 634}, + {"name": "plaque", "id": 1914, "trainId": 635}, + {"name": "stringer", "id": 2619, "trainId": 636}, + {"name": "iron", "id": 1338, "trainId": 637}, + {"name": "spoon", "id": 2505, "trainId": 638}, + {"name": "flag pole", "id": 955, "trainId": 639}, + {"name": "toilet brush", "id": 2786, "trainId": 640}, + {"name": "book stand", "id": 238, "trainId": 641}, + {"name": "water faucet, water tap, tap, hydrant", "id": 3000, "trainId": 642}, + {"name": "ticket office", "id": 2763, "trainId": 643}, + {"name": "broom", "id": 299, "trainId": 644}, + {"name": "dvd", "id": 822, "trainId": 645}, + {"name": "ice bucket", "id": 1288, "trainId": 646}, + {"name": "carapace, shell, cuticle, shield", "id": 3101, "trainId": 647}, + {"name": "tureen", "id": 2894, "trainId": 648}, + {"name": "folders", "id": 992, "trainId": 649}, + {"name": "chess", "id": 489, "trainId": 650}, + {"name": "root", "id": 2157, "trainId": 651}, + {"name": "sewing machine", "id": 2309, "trainId": 652}, + {"name": "model", "id": 1576, "trainId": 653}, + {"name": "pen", "id": 1810, "trainId": 654}, + {"name": "violin", "id": 2964, "trainId": 655}, + {"name": "sweatshirt", "id": 2662, "trainId": 656}, + {"name": "recycling materials", "id": 2087, "trainId": 657}, + {"name": "mitten", "id": 1569, "trainId": 658}, + {"name": "chopping board, cutting board", "id": 503, "trainId": 659}, + {"name": "mask", "id": 1505, "trainId": 660}, + {"name": "log", "id": 1468, "trainId": 661}, + {"name": "mouse, computer mouse", "id": 1613, "trainId": 662}, + {"name": "grill", "id": 1138, "trainId": 663}, + {"name": "hole", "id": 1256, "trainId": 664}, + {"name": "target", "id": 2715, "trainId": 665}, + {"name": "trash bag", "id": 2846, "trainId": 666}, + {"name": "chalk", "id": 477, "trainId": 667}, + {"name": "sticks", "id": 2576, "trainId": 668}, + {"name": "balloon", "id": 108, "trainId": 669}, + {"name": "score", "id": 2245, "trainId": 670}, + {"name": "hair spray", "id": 1162, "trainId": 671}, + {"name": "roll", "id": 2149, "trainId": 672}, + {"name": "runner", "id": 2183, "trainId": 673}, + {"name": "engine", "id": 858, "trainId": 674}, + {"name": "inflatable glove", "id": 1324, "trainId": 675}, + {"name": "games", "id": 1055, "trainId": 676}, + {"name": "pallets", "id": 1741, "trainId": 677}, + {"name": "baskets", "id": 149, "trainId": 678}, + {"name": "coop", "id": 615, "trainId": 679}, + {"name": "dvd player", "id": 825, "trainId": 680}, + {"name": "rocking horse", "id": 2143, "trainId": 681}, + {"name": "buckets", "id": 304, "trainId": 682}, + {"name": "bread rolls", "id": 283, "trainId": 683}, + {"name": "shawl", "id": 2322, "trainId": 684}, + {"name": "watering can", "id": 3017, "trainId": 685}, + {"name": "spotlights", "id": 2510, "trainId": 686}, + {"name": "post-it", "id": 1960, "trainId": 687}, + {"name": "bowls", "id": 265, "trainId": 688}, + {"name": "security camera", "id": 2282, "trainId": 689}, + {"name": "runner cloth", "id": 2184, "trainId": 690}, + {"name": "lock", "id": 1461, "trainId": 691}, + {"name": "alarm, warning device, alarm system", "id": 3113, "trainId": 692}, + {"name": "side", "id": 2372, "trainId": 693}, + {"name": "roulette", "id": 2166, "trainId": 694}, + {"name": "bone", "id": 232, "trainId": 695}, + {"name": "cutlery", "id": 693, "trainId": 696}, + {"name": "pool balls", "id": 1945, "trainId": 697}, + {"name": "wheels", "id": 3039, "trainId": 698}, + {"name": "spice rack", "id": 2494, "trainId": 699}, + {"name": "plant pots", "id": 1908, "trainId": 700}, + {"name": "towel ring", "id": 2827, "trainId": 701}, + {"name": "bread box", "id": 280, "trainId": 702}, + {"name": "video", "id": 2950, "trainId": 703}, + {"name": "funfair", "id": 1044, "trainId": 704}, + {"name": "breads", "id": 288, "trainId": 705}, + {"name": "tripod", "id": 2863, "trainId": 706}, + {"name": "ironing board", "id": 1342, "trainId": 707}, + {"name": "skimmer", "id": 2409, "trainId": 708}, + {"name": "hollow", "id": 1258, "trainId": 709}, + {"name": "scratching post", "id": 2249, "trainId": 710}, + {"name": "tricycle", "id": 2862, "trainId": 711}, + {"name": "file box", "id": 920, "trainId": 712}, + {"name": "mountain pass", "id": 1607, "trainId": 713}, + {"name": "tombstones", "id": 2802, "trainId": 714}, + {"name": "cooker", "id": 610, "trainId": 715}, + {"name": "card game, cards", "id": 3129, "trainId": 716}, + {"name": "golf bag", "id": 1108, "trainId": 717}, + {"name": "towel paper", "id": 2823, "trainId": 718}, + {"name": "chaise lounge", "id": 476, "trainId": 719}, + {"name": "sun", "id": 2641, "trainId": 720}, + {"name": "toilet paper holder", "id": 2788, "trainId": 721}, + {"name": "rake", "id": 2070, "trainId": 722}, + {"name": "key", "id": 1368, "trainId": 723}, + {"name": "umbrella stand", "id": 2903, "trainId": 724}, + {"name": "dartboard", "id": 699, "trainId": 725}, + {"name": "transformer", "id": 2844, "trainId": 726}, + {"name": "fireplace utensils", "id": 942, "trainId": 727}, + {"name": "sweatshirts", "id": 2663, "trainId": 728}, + { + "name": "cellular telephone, cellular phone, cellphone, cell, mobile phone", + "id": 457, + "trainId": 729, + }, + {"name": "tallboy", "id": 2701, "trainId": 730}, + {"name": "stapler", "id": 2540, "trainId": 731}, + {"name": "sauna", "id": 2231, "trainId": 732}, + {"name": "test tube", "id": 2746, "trainId": 733}, + {"name": "palette", "id": 1738, "trainId": 734}, + {"name": "shopping carts", "id": 2350, "trainId": 735}, + {"name": "tools", "id": 2808, "trainId": 736}, + {"name": "push button, push, button", "id": 2025, "trainId": 737}, + {"name": "star", "id": 2541, "trainId": 738}, + {"name": "roof rack", "id": 2156, "trainId": 739}, + {"name": "barbed wire", "id": 126, "trainId": 740}, + {"name": "spray", "id": 2512, "trainId": 741}, + {"name": "ear", "id": 831, "trainId": 742}, + {"name": "sponge", "id": 2503, "trainId": 743}, + {"name": "racket", "id": 2039, "trainId": 744}, + {"name": "tins", "id": 2774, "trainId": 745}, + {"name": "eyeglasses", "id": 886, "trainId": 746}, + {"name": "file", "id": 919, "trainId": 747}, + {"name": "scarfs", "id": 2240, "trainId": 748}, + {"name": "sugar bowl", "id": 2636, "trainId": 749}, + {"name": "flip flop", "id": 963, "trainId": 750}, + {"name": "headstones", "id": 1218, "trainId": 751}, + {"name": "laptop bag", "id": 1406, "trainId": 752}, + {"name": "leash", "id": 1420, "trainId": 753}, + {"name": "climbing frame", "id": 526, "trainId": 754}, + {"name": "suit hanger", "id": 2639, "trainId": 755}, + {"name": "floor spotlight", "id": 975, "trainId": 756}, + {"name": "plate rack", "id": 1921, "trainId": 757}, + {"name": "sewer", "id": 2305, "trainId": 758}, + {"name": "hard drive", "id": 1193, "trainId": 759}, + {"name": "sprinkler", "id": 2517, "trainId": 760}, + {"name": "tools box", "id": 2809, "trainId": 761}, + {"name": "necklace", "id": 1647, "trainId": 762}, + {"name": "bulbs", "id": 314, "trainId": 763}, + {"name": "steel industry", "id": 2560, "trainId": 764}, + {"name": "club", "id": 545, "trainId": 765}, + {"name": "jack", "id": 1345, "trainId": 766}, + {"name": "door bars", "id": 775, "trainId": 767}, + { + "name": "control panel, instrument panel, control board, board, panel", + "id": 603, + "trainId": 768, + }, + {"name": "hairbrush", "id": 1163, "trainId": 769}, + {"name": "napkin holder", "id": 1641, "trainId": 770}, + {"name": "office", "id": 1678, "trainId": 771}, + {"name": "smoke detector", "id": 2450, "trainId": 772}, + {"name": "utensils", "id": 2915, "trainId": 773}, + {"name": "apron", "id": 42, "trainId": 774}, + {"name": "scissors", "id": 2242, "trainId": 775}, + {"name": "terminal", "id": 2741, "trainId": 776}, + {"name": "grinder", "id": 1143, "trainId": 777}, + {"name": "entry phone", "id": 862, "trainId": 778}, + {"name": "newspaper stand", "id": 1654, "trainId": 779}, + {"name": "pepper shaker", "id": 1826, "trainId": 780}, + {"name": "onions", "id": 1689, "trainId": 781}, + { + "name": "central processing unit, cpu, c p u , central processor, processor, mainframe", + "id": 3124, + "trainId": 782, + }, + {"name": "tape", "id": 2710, "trainId": 783}, + {"name": "bat", "id": 152, "trainId": 784}, + {"name": "coaster", "id": 549, "trainId": 785}, + {"name": "calculator", "id": 360, "trainId": 786}, + {"name": "potatoes", "id": 1982, "trainId": 787}, + {"name": "luggage rack", "id": 1478, "trainId": 788}, + {"name": "salt", "id": 2203, "trainId": 789}, + {"name": "street number", "id": 2612, "trainId": 790}, + {"name": "viewpoint", "id": 2956, "trainId": 791}, + {"name": "sword", "id": 2681, "trainId": 792}, + {"name": "cd", "id": 437, "trainId": 793}, + {"name": "rowing machine", "id": 2171, "trainId": 794}, + {"name": "plug", "id": 1933, "trainId": 795}, + {"name": "andiron, firedog, dog, dog-iron", "id": 3110, "trainId": 796}, + {"name": "pepper", "id": 1824, "trainId": 797}, + {"name": "tongs", "id": 2803, "trainId": 798}, + {"name": "bonfire", "id": 234, "trainId": 799}, + {"name": "dog dish", "id": 764, "trainId": 800}, + {"name": "belt", "id": 177, "trainId": 801}, + {"name": "dumbbells", "id": 817, "trainId": 802}, + {"name": "videocassette recorder, vcr", "id": 3145, "trainId": 803}, + {"name": "hook", "id": 1262, "trainId": 804}, + {"name": "envelopes", "id": 864, "trainId": 805}, + {"name": "shower faucet", "id": 2359, "trainId": 806}, + {"name": "watch", "id": 2992, "trainId": 807}, + {"name": "padlock", "id": 1725, "trainId": 808}, + {"name": "swimming pool ladder", "id": 2667, "trainId": 809}, + {"name": "spanners", "id": 2484, "trainId": 810}, + {"name": "gravy boat", "id": 1133, "trainId": 811}, + {"name": "notice board", "id": 1667, "trainId": 812}, + {"name": "trash bags", "id": 2847, "trainId": 813}, + {"name": "fire alarm", "id": 932, "trainId": 814}, + {"name": "ladle", "id": 1392, "trainId": 815}, + {"name": "stethoscope", "id": 2573, "trainId": 816}, + {"name": "rocket", "id": 2140, "trainId": 817}, + {"name": "funnel", "id": 1046, "trainId": 818}, + {"name": "bowling pins", "id": 264, "trainId": 819}, + {"name": "valve", "id": 2927, "trainId": 820}, + {"name": "thermometer", "id": 2752, "trainId": 821}, + {"name": "cups", "id": 679, "trainId": 822}, + {"name": "spice jar", "id": 2493, "trainId": 823}, + {"name": "night light", "id": 1658, "trainId": 824}, + {"name": "soaps", "id": 2466, "trainId": 825}, + {"name": "games table", "id": 1057, "trainId": 826}, + {"name": "slotted spoon", "id": 2444, "trainId": 827}, + {"name": "reel", "id": 2093, "trainId": 828}, + {"name": "scourer", "id": 2248, "trainId": 829}, + {"name": "sleeping robe", "id": 2432, "trainId": 830}, + {"name": "desk mat", "id": 726, "trainId": 831}, + {"name": "dumbbell", "id": 816, "trainId": 832}, + {"name": "hammer", "id": 1171, "trainId": 833}, + {"name": "tie", "id": 2766, "trainId": 834}, + {"name": "typewriter", "id": 2900, "trainId": 835}, + {"name": "shaker", "id": 2313, "trainId": 836}, + {"name": "cheese dish", "id": 488, "trainId": 837}, + {"name": "sea star", "id": 2265, "trainId": 838}, + {"name": "racquet", "id": 2043, "trainId": 839}, + {"name": "butane gas cylinder", "id": 332, "trainId": 840}, + {"name": "paper weight", "id": 1771, "trainId": 841}, + {"name": "shaving brush", "id": 2320, "trainId": 842}, + {"name": "sunglasses", "id": 2646, "trainId": 843}, + {"name": "gear shift", "id": 1089, "trainId": 844}, + {"name": "towel rail", "id": 2826, "trainId": 845}, + {"name": "adding machine, totalizer, totaliser", "id": 3148, "trainId": 846}, +] + +PASCAL_VOC_21_CATEGORIES = [ + {"color": [0, 0, 0], "id": 0, "isthing": 1, "name": "background"}, + {"color": [128, 0, 0], "id": 1, "isthing": 1, "name": "aeroplane"}, + {"color": [0, 128, 0], "id": 2, "isthing": 1, "name": "bicycle"}, + {"color": [128, 128, 0], "id": 3, "isthing": 1, "name": "bird"}, + {"color": [0, 0, 128], "id": 4, "isthing": 1, "name": "boat"}, + {"color": [128, 0, 128], "id": 5, "isthing": 1, "name": "bottle"}, + {"color": [0, 128, 128], "id": 6, "isthing": 1, "name": "bus"}, + {"color": [128, 128, 128], "id": 7, "isthing": 1, "name": "car"}, + {"color": [64, 0, 0], "id": 8, "isthing": 1, "name": "cat"}, + {"color": [192, 0, 0], "id": 9, "isthing": 1, "name": "chair"}, + {"color": [64, 128, 0], "id": 10, "isthing": 1, "name": "cow"}, + {"color": [192, 128, 0], "id": 11, "isthing": 1, "name": "diningtable"}, + {"color": [64, 0, 128], "id": 12, "isthing": 1, "name": "dog"}, + {"color": [192, 0, 128], "id": 13, "isthing": 1, "name": "horse"}, + {"color": [64, 128, 128], "id": 14, "isthing": 1, "name": "motorbike"}, + {"color": [192, 128, 128], "id": 15, "isthing": 1, "name": "person"}, + {"color": [0, 64, 0], "id": 16, "isthing": 1, "name": "pottedplant"}, + {"color": [128, 64, 0], "id": 17, "isthing": 1, "name": "sheep"}, + {"color": [0, 192, 0], "id": 18, "isthing": 1, "name": "sofa"}, + {"color": [128, 192, 0], "id": 19, "isthing": 1, "name": "train"}, + {"color": [0, 64, 128], "id": 20, "isthing": 1, "name": "tvmonitor"}, +] + +PASCAL_CTX_459_CATEGORIES = [ + {"color": [120, 120, 120], "id": 0, "isthing": 0, "name": "accordion"}, + {"color": [180, 120, 120], "id": 1, "isthing": 0, "name": "aeroplane"}, + {"color": [6, 230, 230], "id": 2, "isthing": 0, "name": "air conditioner"}, + {"color": [80, 50, 50], "id": 3, "isthing": 0, "name": "antenna"}, + {"color": [4, 200, 3], "id": 4, "isthing": 0, "name": "artillery"}, + {"color": [120, 120, 80], "id": 5, "isthing": 0, "name": "ashtray"}, + {"color": [140, 140, 140], "id": 6, "isthing": 0, "name": "atrium"}, + {"color": [204, 5, 255], "id": 7, "isthing": 0, "name": "baby carriage"}, + {"color": [230, 230, 230], "id": 8, "isthing": 0, "name": "bag"}, + {"color": [4, 250, 7], "id": 9, "isthing": 0, "name": "ball"}, + {"color": [224, 5, 255], "id": 10, "isthing": 0, "name": "balloon"}, + {"color": [235, 255, 7], "id": 11, "isthing": 0, "name": "bamboo weaving"}, + {"color": [150, 5, 61], "id": 12, "isthing": 0, "name": "barrel"}, + {"color": [120, 120, 70], "id": 13, "isthing": 0, "name": "baseball bat"}, + {"color": [8, 255, 51], "id": 14, "isthing": 0, "name": "basket"}, + {"color": [255, 6, 82], "id": 15, "isthing": 0, "name": "basketball backboard"}, + {"color": [143, 255, 140], "id": 16, "isthing": 0, "name": "bathtub"}, + {"color": [204, 255, 4], "id": 17, "isthing": 0, "name": "bed"}, + {"color": [255, 51, 7], "id": 18, "isthing": 0, "name": "bedclothes"}, + {"color": [204, 70, 3], "id": 19, "isthing": 0, "name": "beer"}, + {"color": [0, 102, 200], "id": 20, "isthing": 0, "name": "bell"}, + {"color": [61, 230, 250], "id": 21, "isthing": 0, "name": "bench"}, + {"color": [255, 6, 51], "id": 22, "isthing": 0, "name": "bicycle"}, + {"color": [11, 102, 255], "id": 23, "isthing": 0, "name": "binoculars"}, + {"color": [255, 7, 71], "id": 24, "isthing": 0, "name": "bird"}, + {"color": [255, 9, 224], "id": 25, "isthing": 0, "name": "bird cage"}, + {"color": [9, 7, 230], "id": 26, "isthing": 0, "name": "bird feeder"}, + {"color": [220, 220, 220], "id": 27, "isthing": 0, "name": "bird nest"}, + {"color": [255, 9, 92], "id": 28, "isthing": 0, "name": "blackboard"}, + {"color": [112, 9, 255], "id": 29, "isthing": 0, "name": "board"}, + {"color": [8, 255, 214], "id": 30, "isthing": 0, "name": "boat"}, + {"color": [7, 255, 224], "id": 31, "isthing": 0, "name": "bone"}, + {"color": [255, 184, 6], "id": 32, "isthing": 0, "name": "book"}, + {"color": [10, 255, 71], "id": 33, "isthing": 0, "name": "bottle"}, + {"color": [255, 41, 10], "id": 34, "isthing": 0, "name": "bottle opener"}, + {"color": [7, 255, 255], "id": 35, "isthing": 0, "name": "bowl"}, + {"color": [224, 255, 8], "id": 36, "isthing": 0, "name": "box"}, + {"color": [102, 8, 255], "id": 37, "isthing": 0, "name": "bracelet"}, + {"color": [255, 61, 6], "id": 38, "isthing": 0, "name": "brick"}, + {"color": [255, 194, 7], "id": 39, "isthing": 0, "name": "bridge"}, + {"color": [255, 122, 8], "id": 40, "isthing": 0, "name": "broom"}, + {"color": [0, 255, 20], "id": 41, "isthing": 0, "name": "brush"}, + {"color": [255, 8, 41], "id": 42, "isthing": 0, "name": "bucket"}, + {"color": [255, 5, 153], "id": 43, "isthing": 0, "name": "building"}, + {"color": [6, 51, 255], "id": 44, "isthing": 0, "name": "bus"}, + {"color": [235, 12, 255], "id": 45, "isthing": 0, "name": "cabinet"}, + {"color": [160, 150, 20], "id": 46, "isthing": 0, "name": "cabinet door"}, + {"color": [0, 163, 255], "id": 47, "isthing": 0, "name": "cage"}, + {"color": [140, 140, 140], "id": 48, "isthing": 0, "name": "cake"}, + {"color": [250, 10, 15], "id": 49, "isthing": 0, "name": "calculator"}, + {"color": [20, 255, 0], "id": 50, "isthing": 0, "name": "calendar"}, + {"color": [31, 255, 0], "id": 51, "isthing": 0, "name": "camel"}, + {"color": [255, 31, 0], "id": 52, "isthing": 0, "name": "camera"}, + {"color": [255, 224, 0], "id": 53, "isthing": 0, "name": "camera lens"}, + {"color": [153, 255, 0], "id": 54, "isthing": 0, "name": "can"}, + {"color": [0, 0, 255], "id": 55, "isthing": 0, "name": "candle"}, + {"color": [255, 71, 0], "id": 56, "isthing": 0, "name": "candle holder"}, + {"color": [0, 235, 255], "id": 57, "isthing": 0, "name": "cap"}, + {"color": [0, 173, 255], "id": 58, "isthing": 0, "name": "car"}, + {"color": [31, 0, 255], "id": 59, "isthing": 0, "name": "card"}, + {"color": [120, 120, 120], "id": 60, "isthing": 0, "name": "cart"}, + {"color": [180, 120, 120], "id": 61, "isthing": 0, "name": "case"}, + {"color": [6, 230, 230], "id": 62, "isthing": 0, "name": "casette recorder"}, + {"color": [80, 50, 50], "id": 63, "isthing": 0, "name": "cash register"}, + {"color": [4, 200, 3], "id": 64, "isthing": 0, "name": "cat"}, + {"color": [120, 120, 80], "id": 65, "isthing": 0, "name": "cd"}, + {"color": [140, 140, 140], "id": 66, "isthing": 0, "name": "cd player"}, + {"color": [204, 5, 255], "id": 67, "isthing": 0, "name": "ceiling"}, + {"color": [230, 230, 230], "id": 68, "isthing": 0, "name": "cell phone"}, + {"color": [4, 250, 7], "id": 69, "isthing": 0, "name": "cello"}, + {"color": [224, 5, 255], "id": 70, "isthing": 0, "name": "chain"}, + {"color": [235, 255, 7], "id": 71, "isthing": 0, "name": "chair"}, + {"color": [150, 5, 61], "id": 72, "isthing": 0, "name": "chessboard"}, + {"color": [120, 120, 70], "id": 73, "isthing": 0, "name": "chicken"}, + {"color": [8, 255, 51], "id": 74, "isthing": 0, "name": "chopstick"}, + {"color": [255, 6, 82], "id": 75, "isthing": 0, "name": "clip"}, + {"color": [143, 255, 140], "id": 76, "isthing": 0, "name": "clippers"}, + {"color": [204, 255, 4], "id": 77, "isthing": 0, "name": "clock"}, + {"color": [255, 51, 7], "id": 78, "isthing": 0, "name": "closet"}, + {"color": [204, 70, 3], "id": 79, "isthing": 0, "name": "cloth"}, + {"color": [0, 102, 200], "id": 80, "isthing": 0, "name": "clothes tree"}, + {"color": [61, 230, 250], "id": 81, "isthing": 0, "name": "coffee"}, + {"color": [255, 6, 51], "id": 82, "isthing": 0, "name": "coffee machine"}, + {"color": [11, 102, 255], "id": 83, "isthing": 0, "name": "comb"}, + {"color": [255, 7, 71], "id": 84, "isthing": 0, "name": "computer"}, + {"color": [255, 9, 224], "id": 85, "isthing": 0, "name": "concrete"}, + {"color": [9, 7, 230], "id": 86, "isthing": 0, "name": "cone"}, + {"color": [220, 220, 220], "id": 87, "isthing": 0, "name": "container"}, + {"color": [255, 9, 92], "id": 88, "isthing": 0, "name": "control booth"}, + {"color": [112, 9, 255], "id": 89, "isthing": 0, "name": "controller"}, + {"color": [8, 255, 214], "id": 90, "isthing": 0, "name": "cooker"}, + {"color": [7, 255, 224], "id": 91, "isthing": 0, "name": "copying machine"}, + {"color": [255, 184, 6], "id": 92, "isthing": 0, "name": "coral"}, + {"color": [10, 255, 71], "id": 93, "isthing": 0, "name": "cork"}, + {"color": [255, 41, 10], "id": 94, "isthing": 0, "name": "corkscrew"}, + {"color": [7, 255, 255], "id": 95, "isthing": 0, "name": "counter"}, + {"color": [224, 255, 8], "id": 96, "isthing": 0, "name": "court"}, + {"color": [102, 8, 255], "id": 97, "isthing": 0, "name": "cow"}, + {"color": [255, 61, 6], "id": 98, "isthing": 0, "name": "crabstick"}, + {"color": [255, 194, 7], "id": 99, "isthing": 0, "name": "crane"}, + {"color": [255, 122, 8], "id": 100, "isthing": 0, "name": "crate"}, + {"color": [0, 255, 20], "id": 101, "isthing": 0, "name": "cross"}, + {"color": [255, 8, 41], "id": 102, "isthing": 0, "name": "crutch"}, + {"color": [255, 5, 153], "id": 103, "isthing": 0, "name": "cup"}, + {"color": [6, 51, 255], "id": 104, "isthing": 0, "name": "curtain"}, + {"color": [235, 12, 255], "id": 105, "isthing": 0, "name": "cushion"}, + {"color": [160, 150, 20], "id": 106, "isthing": 0, "name": "cutting board"}, + {"color": [0, 163, 255], "id": 107, "isthing": 0, "name": "dais"}, + {"color": [140, 140, 140], "id": 108, "isthing": 0, "name": "disc"}, + {"color": [250, 10, 15], "id": 109, "isthing": 0, "name": "disc case"}, + {"color": [20, 255, 0], "id": 110, "isthing": 0, "name": "dishwasher"}, + {"color": [31, 255, 0], "id": 111, "isthing": 0, "name": "dock"}, + {"color": [255, 31, 0], "id": 112, "isthing": 0, "name": "dog"}, + {"color": [255, 224, 0], "id": 113, "isthing": 0, "name": "dolphin"}, + {"color": [153, 255, 0], "id": 114, "isthing": 0, "name": "door"}, + {"color": [0, 0, 255], "id": 115, "isthing": 0, "name": "drainer"}, + {"color": [255, 71, 0], "id": 116, "isthing": 0, "name": "dray"}, + {"color": [0, 235, 255], "id": 117, "isthing": 0, "name": "drink dispenser"}, + {"color": [0, 173, 255], "id": 118, "isthing": 0, "name": "drinking machine"}, + {"color": [31, 0, 255], "id": 119, "isthing": 0, "name": "drop"}, + {"color": [120, 120, 120], "id": 120, "isthing": 0, "name": "drug"}, + {"color": [180, 120, 120], "id": 121, "isthing": 0, "name": "drum"}, + {"color": [6, 230, 230], "id": 122, "isthing": 0, "name": "drum kit"}, + {"color": [80, 50, 50], "id": 123, "isthing": 0, "name": "duck"}, + {"color": [4, 200, 3], "id": 124, "isthing": 0, "name": "dumbbell"}, + {"color": [120, 120, 80], "id": 125, "isthing": 0, "name": "earphone"}, + {"color": [140, 140, 140], "id": 126, "isthing": 0, "name": "earrings"}, + {"color": [204, 5, 255], "id": 127, "isthing": 0, "name": "egg"}, + {"color": [230, 230, 230], "id": 128, "isthing": 0, "name": "electric fan"}, + {"color": [4, 250, 7], "id": 129, "isthing": 0, "name": "electric iron"}, + {"color": [224, 5, 255], "id": 130, "isthing": 0, "name": "electric pot"}, + {"color": [235, 255, 7], "id": 131, "isthing": 0, "name": "electric saw"}, + {"color": [150, 5, 61], "id": 132, "isthing": 0, "name": "electronic keyboard"}, + {"color": [120, 120, 70], "id": 133, "isthing": 0, "name": "engine"}, + {"color": [8, 255, 51], "id": 134, "isthing": 0, "name": "envelope"}, + {"color": [255, 6, 82], "id": 135, "isthing": 0, "name": "equipment"}, + {"color": [143, 255, 140], "id": 136, "isthing": 0, "name": "escalator"}, + {"color": [204, 255, 4], "id": 137, "isthing": 0, "name": "exhibition booth"}, + {"color": [255, 51, 7], "id": 138, "isthing": 0, "name": "extinguisher"}, + {"color": [204, 70, 3], "id": 139, "isthing": 0, "name": "eyeglass"}, + {"color": [0, 102, 200], "id": 140, "isthing": 0, "name": "fan"}, + {"color": [61, 230, 250], "id": 141, "isthing": 0, "name": "faucet"}, + {"color": [255, 6, 51], "id": 142, "isthing": 0, "name": "fax machine"}, + {"color": [11, 102, 255], "id": 143, "isthing": 0, "name": "fence"}, + {"color": [255, 7, 71], "id": 144, "isthing": 0, "name": "ferris wheel"}, + {"color": [255, 9, 224], "id": 145, "isthing": 0, "name": "fire extinguisher"}, + {"color": [9, 7, 230], "id": 146, "isthing": 0, "name": "fire hydrant"}, + {"color": [220, 220, 220], "id": 147, "isthing": 0, "name": "fire place"}, + {"color": [255, 9, 92], "id": 148, "isthing": 0, "name": "fish"}, + {"color": [112, 9, 255], "id": 149, "isthing": 0, "name": "fish tank"}, + {"color": [8, 255, 214], "id": 150, "isthing": 0, "name": "fishbowl"}, + {"color": [7, 255, 224], "id": 151, "isthing": 0, "name": "fishing net"}, + {"color": [255, 184, 6], "id": 152, "isthing": 0, "name": "fishing pole"}, + {"color": [10, 255, 71], "id": 153, "isthing": 0, "name": "flag"}, + {"color": [255, 41, 10], "id": 154, "isthing": 0, "name": "flagstaff"}, + {"color": [7, 255, 255], "id": 155, "isthing": 0, "name": "flame"}, + {"color": [224, 255, 8], "id": 156, "isthing": 0, "name": "flashlight"}, + {"color": [102, 8, 255], "id": 157, "isthing": 0, "name": "floor"}, + {"color": [255, 61, 6], "id": 158, "isthing": 0, "name": "flower"}, + {"color": [255, 194, 7], "id": 159, "isthing": 0, "name": "fly"}, + {"color": [255, 122, 8], "id": 160, "isthing": 0, "name": "foam"}, + {"color": [0, 255, 20], "id": 161, "isthing": 0, "name": "food"}, + {"color": [255, 8, 41], "id": 162, "isthing": 0, "name": "footbridge"}, + {"color": [255, 5, 153], "id": 163, "isthing": 0, "name": "forceps"}, + {"color": [6, 51, 255], "id": 164, "isthing": 0, "name": "fork"}, + {"color": [235, 12, 255], "id": 165, "isthing": 0, "name": "forklift"}, + {"color": [160, 150, 20], "id": 166, "isthing": 0, "name": "fountain"}, + {"color": [0, 163, 255], "id": 167, "isthing": 0, "name": "fox"}, + {"color": [140, 140, 140], "id": 168, "isthing": 0, "name": "frame"}, + {"color": [250, 10, 15], "id": 169, "isthing": 0, "name": "fridge"}, + {"color": [20, 255, 0], "id": 170, "isthing": 0, "name": "frog"}, + {"color": [31, 255, 0], "id": 171, "isthing": 0, "name": "fruit"}, + {"color": [255, 31, 0], "id": 172, "isthing": 0, "name": "funnel"}, + {"color": [255, 224, 0], "id": 173, "isthing": 0, "name": "furnace"}, + {"color": [153, 255, 0], "id": 174, "isthing": 0, "name": "game controller"}, + {"color": [0, 0, 255], "id": 175, "isthing": 0, "name": "game machine"}, + {"color": [255, 71, 0], "id": 176, "isthing": 0, "name": "gas cylinder"}, + {"color": [0, 235, 255], "id": 177, "isthing": 0, "name": "gas hood"}, + {"color": [0, 173, 255], "id": 178, "isthing": 0, "name": "gas stove"}, + {"color": [31, 0, 255], "id": 179, "isthing": 0, "name": "gift box"}, + {"color": [120, 120, 120], "id": 180, "isthing": 0, "name": "glass"}, + {"color": [180, 120, 120], "id": 181, "isthing": 0, "name": "glass marble"}, + {"color": [6, 230, 230], "id": 182, "isthing": 0, "name": "globe"}, + {"color": [80, 50, 50], "id": 183, "isthing": 0, "name": "glove"}, + {"color": [4, 200, 3], "id": 184, "isthing": 0, "name": "goal"}, + {"color": [120, 120, 80], "id": 185, "isthing": 0, "name": "grandstand"}, + {"color": [140, 140, 140], "id": 186, "isthing": 0, "name": "grass"}, + {"color": [204, 5, 255], "id": 187, "isthing": 0, "name": "gravestone"}, + {"color": [230, 230, 230], "id": 188, "isthing": 0, "name": "ground"}, + {"color": [4, 250, 7], "id": 189, "isthing": 0, "name": "guardrail"}, + {"color": [224, 5, 255], "id": 190, "isthing": 0, "name": "guitar"}, + {"color": [235, 255, 7], "id": 191, "isthing": 0, "name": "gun"}, + {"color": [150, 5, 61], "id": 192, "isthing": 0, "name": "hammer"}, + {"color": [120, 120, 70], "id": 193, "isthing": 0, "name": "hand cart"}, + {"color": [8, 255, 51], "id": 194, "isthing": 0, "name": "handle"}, + {"color": [255, 6, 82], "id": 195, "isthing": 0, "name": "handrail"}, + {"color": [143, 255, 140], "id": 196, "isthing": 0, "name": "hanger"}, + {"color": [204, 255, 4], "id": 197, "isthing": 0, "name": "hard disk drive"}, + {"color": [255, 51, 7], "id": 198, "isthing": 0, "name": "hat"}, + {"color": [204, 70, 3], "id": 199, "isthing": 0, "name": "hay"}, + {"color": [0, 102, 200], "id": 200, "isthing": 0, "name": "headphone"}, + {"color": [61, 230, 250], "id": 201, "isthing": 0, "name": "heater"}, + {"color": [255, 6, 51], "id": 202, "isthing": 0, "name": "helicopter"}, + {"color": [11, 102, 255], "id": 203, "isthing": 0, "name": "helmet"}, + {"color": [255, 7, 71], "id": 204, "isthing": 0, "name": "holder"}, + {"color": [255, 9, 224], "id": 205, "isthing": 0, "name": "hook"}, + {"color": [9, 7, 230], "id": 206, "isthing": 0, "name": "horse"}, + {"color": [220, 220, 220], "id": 207, "isthing": 0, "name": "horse-drawn carriage"}, + {"color": [255, 9, 92], "id": 208, "isthing": 0, "name": "hot-air balloon"}, + {"color": [112, 9, 255], "id": 209, "isthing": 0, "name": "hydrovalve"}, + {"color": [8, 255, 214], "id": 210, "isthing": 0, "name": "ice"}, + {"color": [7, 255, 224], "id": 211, "isthing": 0, "name": "inflator pump"}, + {"color": [255, 184, 6], "id": 212, "isthing": 0, "name": "ipod"}, + {"color": [10, 255, 71], "id": 213, "isthing": 0, "name": "iron"}, + {"color": [255, 41, 10], "id": 214, "isthing": 0, "name": "ironing board"}, + {"color": [7, 255, 255], "id": 215, "isthing": 0, "name": "jar"}, + {"color": [224, 255, 8], "id": 216, "isthing": 0, "name": "kart"}, + {"color": [102, 8, 255], "id": 217, "isthing": 0, "name": "kettle"}, + {"color": [255, 61, 6], "id": 218, "isthing": 0, "name": "key"}, + {"color": [255, 194, 7], "id": 219, "isthing": 0, "name": "keyboard"}, + {"color": [255, 122, 8], "id": 220, "isthing": 0, "name": "kitchen range"}, + {"color": [0, 255, 20], "id": 221, "isthing": 0, "name": "kite"}, + {"color": [255, 8, 41], "id": 222, "isthing": 0, "name": "knife"}, + {"color": [255, 5, 153], "id": 223, "isthing": 0, "name": "knife block"}, + {"color": [6, 51, 255], "id": 224, "isthing": 0, "name": "ladder"}, + {"color": [235, 12, 255], "id": 225, "isthing": 0, "name": "ladder truck"}, + {"color": [160, 150, 20], "id": 226, "isthing": 0, "name": "ladle"}, + {"color": [0, 163, 255], "id": 227, "isthing": 0, "name": "laptop"}, + {"color": [140, 140, 140], "id": 228, "isthing": 0, "name": "leaves"}, + {"color": [250, 10, 15], "id": 229, "isthing": 0, "name": "lid"}, + {"color": [20, 255, 0], "id": 230, "isthing": 0, "name": "life buoy"}, + {"color": [31, 255, 0], "id": 231, "isthing": 0, "name": "light"}, + {"color": [255, 31, 0], "id": 232, "isthing": 0, "name": "light bulb"}, + {"color": [255, 224, 0], "id": 233, "isthing": 0, "name": "lighter"}, + {"color": [153, 255, 0], "id": 234, "isthing": 0, "name": "line"}, + {"color": [0, 0, 255], "id": 235, "isthing": 0, "name": "lion"}, + {"color": [255, 71, 0], "id": 236, "isthing": 0, "name": "lobster"}, + {"color": [0, 235, 255], "id": 237, "isthing": 0, "name": "lock"}, + {"color": [0, 173, 255], "id": 238, "isthing": 0, "name": "machine"}, + {"color": [31, 0, 255], "id": 239, "isthing": 0, "name": "mailbox"}, + {"color": [120, 120, 120], "id": 240, "isthing": 0, "name": "mannequin"}, + {"color": [180, 120, 120], "id": 241, "isthing": 0, "name": "map"}, + {"color": [6, 230, 230], "id": 242, "isthing": 0, "name": "mask"}, + {"color": [80, 50, 50], "id": 243, "isthing": 0, "name": "mat"}, + {"color": [4, 200, 3], "id": 244, "isthing": 0, "name": "match book"}, + {"color": [120, 120, 80], "id": 245, "isthing": 0, "name": "mattress"}, + {"color": [140, 140, 140], "id": 246, "isthing": 0, "name": "menu"}, + {"color": [204, 5, 255], "id": 247, "isthing": 0, "name": "metal"}, + {"color": [230, 230, 230], "id": 248, "isthing": 0, "name": "meter box"}, + {"color": [4, 250, 7], "id": 249, "isthing": 0, "name": "microphone"}, + {"color": [224, 5, 255], "id": 250, "isthing": 0, "name": "microwave"}, + {"color": [235, 255, 7], "id": 251, "isthing": 0, "name": "mirror"}, + {"color": [150, 5, 61], "id": 252, "isthing": 0, "name": "missile"}, + {"color": [120, 120, 70], "id": 253, "isthing": 0, "name": "model"}, + {"color": [8, 255, 51], "id": 254, "isthing": 0, "name": "money"}, + {"color": [255, 6, 82], "id": 255, "isthing": 0, "name": "monkey"}, + {"color": [143, 255, 140], "id": 256, "isthing": 0, "name": "mop"}, + {"color": [204, 255, 4], "id": 257, "isthing": 0, "name": "motorbike"}, + {"color": [255, 51, 7], "id": 258, "isthing": 0, "name": "mountain"}, + {"color": [204, 70, 3], "id": 259, "isthing": 0, "name": "mouse"}, + {"color": [0, 102, 200], "id": 260, "isthing": 0, "name": "mouse pad"}, + {"color": [61, 230, 250], "id": 261, "isthing": 0, "name": "musical instrument"}, + {"color": [255, 6, 51], "id": 262, "isthing": 0, "name": "napkin"}, + {"color": [11, 102, 255], "id": 263, "isthing": 0, "name": "net"}, + {"color": [255, 7, 71], "id": 264, "isthing": 0, "name": "newspaper"}, + {"color": [255, 9, 224], "id": 265, "isthing": 0, "name": "oar"}, + {"color": [9, 7, 230], "id": 266, "isthing": 0, "name": "ornament"}, + {"color": [220, 220, 220], "id": 267, "isthing": 0, "name": "outlet"}, + {"color": [255, 9, 92], "id": 268, "isthing": 0, "name": "oven"}, + {"color": [112, 9, 255], "id": 269, "isthing": 0, "name": "oxygen bottle"}, + {"color": [8, 255, 214], "id": 270, "isthing": 0, "name": "pack"}, + {"color": [7, 255, 224], "id": 271, "isthing": 0, "name": "pan"}, + {"color": [255, 184, 6], "id": 272, "isthing": 0, "name": "paper"}, + {"color": [10, 255, 71], "id": 273, "isthing": 0, "name": "paper box"}, + {"color": [255, 41, 10], "id": 274, "isthing": 0, "name": "paper cutter"}, + {"color": [7, 255, 255], "id": 275, "isthing": 0, "name": "parachute"}, + {"color": [224, 255, 8], "id": 276, "isthing": 0, "name": "parasol"}, + {"color": [102, 8, 255], "id": 277, "isthing": 0, "name": "parterre"}, + {"color": [255, 61, 6], "id": 278, "isthing": 0, "name": "patio"}, + {"color": [255, 194, 7], "id": 279, "isthing": 0, "name": "pelage"}, + {"color": [255, 122, 8], "id": 280, "isthing": 0, "name": "pen"}, + {"color": [0, 255, 20], "id": 281, "isthing": 0, "name": "pen container"}, + {"color": [255, 8, 41], "id": 282, "isthing": 0, "name": "pencil"}, + {"color": [255, 5, 153], "id": 283, "isthing": 0, "name": "person"}, + {"color": [6, 51, 255], "id": 284, "isthing": 0, "name": "photo"}, + {"color": [235, 12, 255], "id": 285, "isthing": 0, "name": "piano"}, + {"color": [160, 150, 20], "id": 286, "isthing": 0, "name": "picture"}, + {"color": [0, 163, 255], "id": 287, "isthing": 0, "name": "pig"}, + {"color": [140, 140, 140], "id": 288, "isthing": 0, "name": "pillar"}, + {"color": [250, 10, 15], "id": 289, "isthing": 0, "name": "pillow"}, + {"color": [20, 255, 0], "id": 290, "isthing": 0, "name": "pipe"}, + {"color": [31, 255, 0], "id": 291, "isthing": 0, "name": "pitcher"}, + {"color": [255, 31, 0], "id": 292, "isthing": 0, "name": "plant"}, + {"color": [255, 224, 0], "id": 293, "isthing": 0, "name": "plastic"}, + {"color": [153, 255, 0], "id": 294, "isthing": 0, "name": "plate"}, + {"color": [0, 0, 255], "id": 295, "isthing": 0, "name": "platform"}, + {"color": [255, 71, 0], "id": 296, "isthing": 0, "name": "player"}, + {"color": [0, 235, 255], "id": 297, "isthing": 0, "name": "playground"}, + {"color": [0, 173, 255], "id": 298, "isthing": 0, "name": "pliers"}, + {"color": [31, 0, 255], "id": 299, "isthing": 0, "name": "plume"}, + {"color": [120, 120, 120], "id": 300, "isthing": 0, "name": "poker"}, + {"color": [180, 120, 120], "id": 301, "isthing": 0, "name": "poker chip"}, + {"color": [6, 230, 230], "id": 302, "isthing": 0, "name": "pole"}, + {"color": [80, 50, 50], "id": 303, "isthing": 0, "name": "pool table"}, + {"color": [4, 200, 3], "id": 304, "isthing": 0, "name": "postcard"}, + {"color": [120, 120, 80], "id": 305, "isthing": 0, "name": "poster"}, + {"color": [140, 140, 140], "id": 306, "isthing": 0, "name": "pot"}, + {"color": [204, 5, 255], "id": 307, "isthing": 0, "name": "pottedplant"}, + {"color": [230, 230, 230], "id": 308, "isthing": 0, "name": "printer"}, + {"color": [4, 250, 7], "id": 309, "isthing": 0, "name": "projector"}, + {"color": [224, 5, 255], "id": 310, "isthing": 0, "name": "pumpkin"}, + {"color": [235, 255, 7], "id": 311, "isthing": 0, "name": "rabbit"}, + {"color": [150, 5, 61], "id": 312, "isthing": 0, "name": "racket"}, + {"color": [120, 120, 70], "id": 313, "isthing": 0, "name": "radiator"}, + {"color": [8, 255, 51], "id": 314, "isthing": 0, "name": "radio"}, + {"color": [255, 6, 82], "id": 315, "isthing": 0, "name": "rail"}, + {"color": [143, 255, 140], "id": 316, "isthing": 0, "name": "rake"}, + {"color": [204, 255, 4], "id": 317, "isthing": 0, "name": "ramp"}, + {"color": [255, 51, 7], "id": 318, "isthing": 0, "name": "range hood"}, + {"color": [204, 70, 3], "id": 319, "isthing": 0, "name": "receiver"}, + {"color": [0, 102, 200], "id": 320, "isthing": 0, "name": "recorder"}, + {"color": [61, 230, 250], "id": 321, "isthing": 0, "name": "recreational machines"}, + {"color": [255, 6, 51], "id": 322, "isthing": 0, "name": "remote control"}, + {"color": [11, 102, 255], "id": 323, "isthing": 0, "name": "road"}, + {"color": [255, 7, 71], "id": 324, "isthing": 0, "name": "robot"}, + {"color": [255, 9, 224], "id": 325, "isthing": 0, "name": "rock"}, + {"color": [9, 7, 230], "id": 326, "isthing": 0, "name": "rocket"}, + {"color": [220, 220, 220], "id": 327, "isthing": 0, "name": "rocking horse"}, + {"color": [255, 9, 92], "id": 328, "isthing": 0, "name": "rope"}, + {"color": [112, 9, 255], "id": 329, "isthing": 0, "name": "rug"}, + {"color": [8, 255, 214], "id": 330, "isthing": 0, "name": "ruler"}, + {"color": [7, 255, 224], "id": 331, "isthing": 0, "name": "runway"}, + {"color": [255, 184, 6], "id": 332, "isthing": 0, "name": "saddle"}, + {"color": [10, 255, 71], "id": 333, "isthing": 0, "name": "sand"}, + {"color": [255, 41, 10], "id": 334, "isthing": 0, "name": "saw"}, + {"color": [7, 255, 255], "id": 335, "isthing": 0, "name": "scale"}, + {"color": [224, 255, 8], "id": 336, "isthing": 0, "name": "scanner"}, + {"color": [102, 8, 255], "id": 337, "isthing": 0, "name": "scissors"}, + {"color": [255, 61, 6], "id": 338, "isthing": 0, "name": "scoop"}, + {"color": [255, 194, 7], "id": 339, "isthing": 0, "name": "screen"}, + {"color": [255, 122, 8], "id": 340, "isthing": 0, "name": "screwdriver"}, + {"color": [0, 255, 20], "id": 341, "isthing": 0, "name": "sculpture"}, + {"color": [255, 8, 41], "id": 342, "isthing": 0, "name": "scythe"}, + {"color": [255, 5, 153], "id": 343, "isthing": 0, "name": "sewer"}, + {"color": [6, 51, 255], "id": 344, "isthing": 0, "name": "sewing machine"}, + {"color": [235, 12, 255], "id": 345, "isthing": 0, "name": "shed"}, + {"color": [160, 150, 20], "id": 346, "isthing": 0, "name": "sheep"}, + {"color": [0, 163, 255], "id": 347, "isthing": 0, "name": "shell"}, + {"color": [140, 140, 140], "id": 348, "isthing": 0, "name": "shelves"}, + {"color": [250, 10, 15], "id": 349, "isthing": 0, "name": "shoe"}, + {"color": [20, 255, 0], "id": 350, "isthing": 0, "name": "shopping cart"}, + {"color": [31, 255, 0], "id": 351, "isthing": 0, "name": "shovel"}, + {"color": [255, 31, 0], "id": 352, "isthing": 0, "name": "sidecar"}, + {"color": [255, 224, 0], "id": 353, "isthing": 0, "name": "sidewalk"}, + {"color": [153, 255, 0], "id": 354, "isthing": 0, "name": "sign"}, + {"color": [0, 0, 255], "id": 355, "isthing": 0, "name": "signal light"}, + {"color": [255, 71, 0], "id": 356, "isthing": 0, "name": "sink"}, + {"color": [0, 235, 255], "id": 357, "isthing": 0, "name": "skateboard"}, + {"color": [0, 173, 255], "id": 358, "isthing": 0, "name": "ski"}, + {"color": [31, 0, 255], "id": 359, "isthing": 0, "name": "sky"}, + {"color": [120, 120, 120], "id": 360, "isthing": 0, "name": "sled"}, + {"color": [180, 120, 120], "id": 361, "isthing": 0, "name": "slippers"}, + {"color": [6, 230, 230], "id": 362, "isthing": 0, "name": "smoke"}, + {"color": [80, 50, 50], "id": 363, "isthing": 0, "name": "snail"}, + {"color": [4, 200, 3], "id": 364, "isthing": 0, "name": "snake"}, + {"color": [120, 120, 80], "id": 365, "isthing": 0, "name": "snow"}, + {"color": [140, 140, 140], "id": 366, "isthing": 0, "name": "snowmobiles"}, + {"color": [204, 5, 255], "id": 367, "isthing": 0, "name": "sofa"}, + {"color": [230, 230, 230], "id": 368, "isthing": 0, "name": "spanner"}, + {"color": [4, 250, 7], "id": 369, "isthing": 0, "name": "spatula"}, + {"color": [224, 5, 255], "id": 370, "isthing": 0, "name": "speaker"}, + {"color": [235, 255, 7], "id": 371, "isthing": 0, "name": "speed bump"}, + {"color": [150, 5, 61], "id": 372, "isthing": 0, "name": "spice container"}, + {"color": [120, 120, 70], "id": 373, "isthing": 0, "name": "spoon"}, + {"color": [8, 255, 51], "id": 374, "isthing": 0, "name": "sprayer"}, + {"color": [255, 6, 82], "id": 375, "isthing": 0, "name": "squirrel"}, + {"color": [143, 255, 140], "id": 376, "isthing": 0, "name": "stage"}, + {"color": [204, 255, 4], "id": 377, "isthing": 0, "name": "stair"}, + {"color": [255, 51, 7], "id": 378, "isthing": 0, "name": "stapler"}, + {"color": [204, 70, 3], "id": 379, "isthing": 0, "name": "stick"}, + {"color": [0, 102, 200], "id": 380, "isthing": 0, "name": "sticky note"}, + {"color": [61, 230, 250], "id": 381, "isthing": 0, "name": "stone"}, + {"color": [255, 6, 51], "id": 382, "isthing": 0, "name": "stool"}, + {"color": [11, 102, 255], "id": 383, "isthing": 0, "name": "stove"}, + {"color": [255, 7, 71], "id": 384, "isthing": 0, "name": "straw"}, + {"color": [255, 9, 224], "id": 385, "isthing": 0, "name": "stretcher"}, + {"color": [9, 7, 230], "id": 386, "isthing": 0, "name": "sun"}, + {"color": [220, 220, 220], "id": 387, "isthing": 0, "name": "sunglass"}, + {"color": [255, 9, 92], "id": 388, "isthing": 0, "name": "sunshade"}, + {"color": [112, 9, 255], "id": 389, "isthing": 0, "name": "surveillance camera"}, + {"color": [8, 255, 214], "id": 390, "isthing": 0, "name": "swan"}, + {"color": [7, 255, 224], "id": 391, "isthing": 0, "name": "sweeper"}, + {"color": [255, 184, 6], "id": 392, "isthing": 0, "name": "swim ring"}, + {"color": [10, 255, 71], "id": 393, "isthing": 0, "name": "swimming pool"}, + {"color": [255, 41, 10], "id": 394, "isthing": 0, "name": "swing"}, + {"color": [7, 255, 255], "id": 395, "isthing": 0, "name": "switch"}, + {"color": [224, 255, 8], "id": 396, "isthing": 0, "name": "table"}, + {"color": [102, 8, 255], "id": 397, "isthing": 0, "name": "tableware"}, + {"color": [255, 61, 6], "id": 398, "isthing": 0, "name": "tank"}, + {"color": [255, 194, 7], "id": 399, "isthing": 0, "name": "tap"}, + {"color": [255, 122, 8], "id": 400, "isthing": 0, "name": "tape"}, + {"color": [0, 255, 20], "id": 401, "isthing": 0, "name": "tarp"}, + {"color": [255, 8, 41], "id": 402, "isthing": 0, "name": "telephone"}, + {"color": [255, 5, 153], "id": 403, "isthing": 0, "name": "telephone booth"}, + {"color": [6, 51, 255], "id": 404, "isthing": 0, "name": "tent"}, + {"color": [235, 12, 255], "id": 405, "isthing": 0, "name": "tire"}, + {"color": [160, 150, 20], "id": 406, "isthing": 0, "name": "toaster"}, + {"color": [0, 163, 255], "id": 407, "isthing": 0, "name": "toilet"}, + {"color": [140, 140, 140], "id": 408, "isthing": 0, "name": "tong"}, + {"color": [250, 10, 15], "id": 409, "isthing": 0, "name": "tool"}, + {"color": [20, 255, 0], "id": 410, "isthing": 0, "name": "toothbrush"}, + {"color": [31, 255, 0], "id": 411, "isthing": 0, "name": "towel"}, + {"color": [255, 31, 0], "id": 412, "isthing": 0, "name": "toy"}, + {"color": [255, 224, 0], "id": 413, "isthing": 0, "name": "toy car"}, + {"color": [153, 255, 0], "id": 414, "isthing": 0, "name": "track"}, + {"color": [0, 0, 255], "id": 415, "isthing": 0, "name": "train"}, + {"color": [255, 71, 0], "id": 416, "isthing": 0, "name": "trampoline"}, + {"color": [0, 235, 255], "id": 417, "isthing": 0, "name": "trash bin"}, + {"color": [0, 173, 255], "id": 418, "isthing": 0, "name": "tray"}, + {"color": [31, 0, 255], "id": 419, "isthing": 0, "name": "tree"}, + {"color": [120, 120, 120], "id": 420, "isthing": 0, "name": "tricycle"}, + {"color": [180, 120, 120], "id": 421, "isthing": 0, "name": "tripod"}, + {"color": [6, 230, 230], "id": 422, "isthing": 0, "name": "trophy"}, + {"color": [80, 50, 50], "id": 423, "isthing": 0, "name": "truck"}, + {"color": [4, 200, 3], "id": 424, "isthing": 0, "name": "tube"}, + {"color": [120, 120, 80], "id": 425, "isthing": 0, "name": "turtle"}, + {"color": [140, 140, 140], "id": 426, "isthing": 0, "name": "tvmonitor"}, + {"color": [204, 5, 255], "id": 427, "isthing": 0, "name": "tweezers"}, + {"color": [230, 230, 230], "id": 428, "isthing": 0, "name": "typewriter"}, + {"color": [4, 250, 7], "id": 429, "isthing": 0, "name": "umbrella"}, + {"color": [224, 5, 255], "id": 430, "isthing": 0, "name": "unknown"}, + {"color": [235, 255, 7], "id": 431, "isthing": 0, "name": "vacuum cleaner"}, + {"color": [150, 5, 61], "id": 432, "isthing": 0, "name": "vending machine"}, + {"color": [120, 120, 70], "id": 433, "isthing": 0, "name": "video camera"}, + {"color": [8, 255, 51], "id": 434, "isthing": 0, "name": "video game console"}, + {"color": [255, 6, 82], "id": 435, "isthing": 0, "name": "video player"}, + {"color": [143, 255, 140], "id": 436, "isthing": 0, "name": "video tape"}, + {"color": [204, 255, 4], "id": 437, "isthing": 0, "name": "violin"}, + {"color": [255, 51, 7], "id": 438, "isthing": 0, "name": "wakeboard"}, + {"color": [204, 70, 3], "id": 439, "isthing": 0, "name": "wall"}, + {"color": [0, 102, 200], "id": 440, "isthing": 0, "name": "wallet"}, + {"color": [61, 230, 250], "id": 441, "isthing": 0, "name": "wardrobe"}, + {"color": [255, 6, 51], "id": 442, "isthing": 0, "name": "washing machine"}, + {"color": [11, 102, 255], "id": 443, "isthing": 0, "name": "watch"}, + {"color": [255, 7, 71], "id": 444, "isthing": 0, "name": "water"}, + {"color": [255, 9, 224], "id": 445, "isthing": 0, "name": "water dispenser"}, + {"color": [9, 7, 230], "id": 446, "isthing": 0, "name": "water pipe"}, + {"color": [220, 220, 220], "id": 447, "isthing": 0, "name": "water skate board"}, + {"color": [255, 9, 92], "id": 448, "isthing": 0, "name": "watermelon"}, + {"color": [112, 9, 255], "id": 449, "isthing": 0, "name": "whale"}, + {"color": [8, 255, 214], "id": 450, "isthing": 0, "name": "wharf"}, + {"color": [7, 255, 224], "id": 451, "isthing": 0, "name": "wheel"}, + {"color": [255, 184, 6], "id": 452, "isthing": 0, "name": "wheelchair"}, + {"color": [10, 255, 71], "id": 453, "isthing": 0, "name": "window"}, + {"color": [255, 41, 10], "id": 454, "isthing": 0, "name": "window blinds"}, + {"color": [7, 255, 255], "id": 455, "isthing": 0, "name": "wineglass"}, + {"color": [224, 255, 8], "id": 456, "isthing": 0, "name": "wire"}, + {"color": [102, 8, 255], "id": 457, "isthing": 0, "name": "wood"}, + {"color": [255, 61, 6], "id": 458, "isthing": 0, "name": "wool"}, +] + +PASCAL_CTX_59_CATEGORIES = [ + {"color": [180, 120, 120], "id": 0, "isthing": 0, "name": "aeroplane"}, + {"color": [6, 230, 230], "id": 1, "isthing": 0, "name": "bag"}, + {"color": [80, 50, 50], "id": 2, "isthing": 0, "name": "bed"}, + {"color": [4, 200, 3], "id": 3, "isthing": 0, "name": "bedclothes"}, + {"color": [120, 120, 80], "id": 4, "isthing": 0, "name": "bench"}, + {"color": [140, 140, 140], "id": 5, "isthing": 0, "name": "bicycle"}, + {"color": [204, 5, 255], "id": 6, "isthing": 0, "name": "bird"}, + {"color": [230, 230, 230], "id": 7, "isthing": 0, "name": "boat"}, + {"color": [4, 250, 7], "id": 8, "isthing": 0, "name": "book"}, + {"color": [224, 5, 255], "id": 9, "isthing": 0, "name": "bottle"}, + {"color": [235, 255, 7], "id": 10, "isthing": 0, "name": "building"}, + {"color": [150, 5, 61], "id": 11, "isthing": 0, "name": "bus"}, + {"color": [120, 120, 70], "id": 12, "isthing": 0, "name": "cabinet"}, + {"color": [8, 255, 51], "id": 13, "isthing": 0, "name": "car"}, + {"color": [255, 6, 82], "id": 14, "isthing": 0, "name": "cat"}, + {"color": [143, 255, 140], "id": 15, "isthing": 0, "name": "ceiling"}, + {"color": [204, 255, 4], "id": 16, "isthing": 0, "name": "chair"}, + {"color": [255, 51, 7], "id": 17, "isthing": 0, "name": "cloth"}, + {"color": [204, 70, 3], "id": 18, "isthing": 0, "name": "computer"}, + {"color": [0, 102, 200], "id": 19, "isthing": 0, "name": "cow"}, + {"color": [61, 230, 250], "id": 20, "isthing": 0, "name": "cup"}, + {"color": [255, 6, 51], "id": 21, "isthing": 0, "name": "curtain"}, + {"color": [11, 102, 255], "id": 22, "isthing": 0, "name": "dog"}, + {"color": [255, 7, 71], "id": 23, "isthing": 0, "name": "door"}, + {"color": [255, 9, 224], "id": 24, "isthing": 0, "name": "fence"}, + {"color": [9, 7, 230], "id": 25, "isthing": 0, "name": "floor"}, + {"color": [220, 220, 220], "id": 26, "isthing": 0, "name": "flower"}, + {"color": [255, 9, 92], "id": 27, "isthing": 0, "name": "food"}, + {"color": [112, 9, 255], "id": 28, "isthing": 0, "name": "grass"}, + {"color": [8, 255, 214], "id": 29, "isthing": 0, "name": "ground"}, + {"color": [7, 255, 224], "id": 30, "isthing": 0, "name": "horse"}, + {"color": [255, 184, 6], "id": 31, "isthing": 0, "name": "keyboard"}, + {"color": [10, 255, 71], "id": 32, "isthing": 0, "name": "light"}, + {"color": [255, 41, 10], "id": 33, "isthing": 0, "name": "motorbike"}, + {"color": [7, 255, 255], "id": 34, "isthing": 0, "name": "mountain"}, + {"color": [224, 255, 8], "id": 35, "isthing": 0, "name": "mouse"}, + {"color": [102, 8, 255], "id": 36, "isthing": 0, "name": "person"}, + {"color": [255, 61, 6], "id": 37, "isthing": 0, "name": "plate"}, + {"color": [255, 194, 7], "id": 38, "isthing": 0, "name": "platform"}, + {"color": [255, 122, 8], "id": 39, "isthing": 0, "name": "pottedplant"}, + {"color": [0, 255, 20], "id": 40, "isthing": 0, "name": "road"}, + {"color": [255, 8, 41], "id": 41, "isthing": 0, "name": "rock"}, + {"color": [255, 5, 153], "id": 42, "isthing": 0, "name": "sheep"}, + {"color": [6, 51, 255], "id": 43, "isthing": 0, "name": "shelves"}, + {"color": [235, 12, 255], "id": 44, "isthing": 0, "name": "sidewalk"}, + {"color": [160, 150, 20], "id": 45, "isthing": 0, "name": "sign"}, + {"color": [0, 163, 255], "id": 46, "isthing": 0, "name": "sky"}, + {"color": [140, 140, 140], "id": 47, "isthing": 0, "name": "snow"}, + {"color": [250, 10, 15], "id": 48, "isthing": 0, "name": "sofa"}, + {"color": [20, 255, 0], "id": 49, "isthing": 0, "name": "diningtable"}, + {"color": [31, 255, 0], "id": 50, "isthing": 0, "name": "track"}, + {"color": [255, 31, 0], "id": 51, "isthing": 0, "name": "train"}, + {"color": [255, 224, 0], "id": 52, "isthing": 0, "name": "tree"}, + {"color": [153, 255, 0], "id": 53, "isthing": 0, "name": "truck"}, + {"color": [0, 0, 255], "id": 54, "isthing": 0, "name": "tvmonitor"}, + {"color": [255, 71, 0], "id": 55, "isthing": 0, "name": "wall"}, + {"color": [0, 235, 255], "id": 56, "isthing": 0, "name": "water"}, + {"color": [0, 173, 255], "id": 57, "isthing": 0, "name": "window"}, + {"color": [31, 0, 255], "id": 58, "isthing": 0, "name": "wood"}, +] + +MAPILLARY_VISTAS_SEM_SEG_CATEGORIES = [ + {'color': [165, 42, 42], + 'id': 1, + 'isthing': 1, + 'name': 'Bird', + 'supercategory': 'animal--bird'}, + {'color': [0, 192, 0], + 'id': 2, + 'isthing': 1, + 'name': 'Ground Animal', + 'supercategory': 'animal--ground-animal'}, + {'color': [196, 196, 196], + 'id': 3, + 'isthing': 0, + 'name': 'Curb', + 'supercategory': 'construction--barrier--curb'}, + {'color': [190, 153, 153], + 'id': 4, + 'isthing': 0, + 'name': 'Fence', + 'supercategory': 'construction--barrier--fence'}, + {'color': [180, 165, 180], + 'id': 5, + 'isthing': 0, + 'name': 'Guard Rail', + 'supercategory': 'construction--barrier--guard-rail'}, + {'color': [90, 120, 150], + 'id': 6, + 'isthing': 0, + 'name': 'Barrier', + 'supercategory': 'construction--barrier--other-barrier'}, + {'color': [102, 102, 156], + 'id': 7, + 'isthing': 0, + 'name': 'Wall', + 'supercategory': 'construction--barrier--wall'}, + {'color': [128, 64, 255], + 'id': 8, + 'isthing': 0, + 'name': 'Bike Lane', + 'supercategory': 'construction--flat--bike-lane'}, + {'color': [140, 140, 200], + 'id': 9, + 'isthing': 1, + 'name': 'Crosswalk - Plain', + 'supercategory': 'construction--flat--crosswalk-plain'}, + {'color': [170, 170, 170], + 'id': 10, + 'isthing': 0, + 'name': 'Curb Cut', + 'supercategory': 'construction--flat--curb-cut'}, + {'color': [250, 170, 160], + 'id': 11, + 'isthing': 0, + 'name': 'Parking', + 'supercategory': 'construction--flat--parking'}, + {'color': [96, 96, 96], + 'id': 12, + 'isthing': 0, + 'name': 'Pedestrian Area', + 'supercategory': 'construction--flat--pedestrian-area'}, + {'color': [230, 150, 140], + 'id': 13, + 'isthing': 0, + 'name': 'Rail Track', + 'supercategory': 'construction--flat--rail-track'}, + {'color': [128, 64, 128], + 'id': 14, + 'isthing': 0, + 'name': 'Road', + 'supercategory': 'construction--flat--road'}, + {'color': [110, 110, 110], + 'id': 15, + 'isthing': 0, + 'name': 'Service Lane', + 'supercategory': 'construction--flat--service-lane'}, + {'color': [244, 35, 232], + 'id': 16, + 'isthing': 0, + 'name': 'Sidewalk', + 'supercategory': 'construction--flat--sidewalk'}, + {'color': [150, 100, 100], + 'id': 17, + 'isthing': 0, + 'name': 'Bridge', + 'supercategory': 'construction--structure--bridge'}, + {'color': [70, 70, 70], + 'id': 18, + 'isthing': 0, + 'name': 'Building', + 'supercategory': 'construction--structure--building'}, + {'color': [150, 120, 90], + 'id': 19, + 'isthing': 0, + 'name': 'Tunnel', + 'supercategory': 'construction--structure--tunnel'}, + {'color': [220, 20, 60], + 'id': 20, + 'isthing': 1, + 'name': 'Person', + 'supercategory': 'human--person'}, + {'color': [255, 0, 0], + 'id': 21, + 'isthing': 1, + 'name': 'Bicyclist', + 'supercategory': 'human--rider--bicyclist'}, + {'color': [255, 0, 100], + 'id': 22, + 'isthing': 1, + 'name': 'Motorcyclist', + 'supercategory': 'human--rider--motorcyclist'}, + {'color': [255, 0, 200], + 'id': 23, + 'isthing': 1, + 'name': 'Other Rider', + 'supercategory': 'human--rider--other-rider'}, + {'color': [200, 128, 128], + 'id': 24, + 'isthing': 1, + 'name': 'Lane Marking - Crosswalk', + 'supercategory': 'marking--crosswalk-zebra'}, + {'color': [255, 255, 255], + 'id': 25, + 'isthing': 0, + 'name': 'Lane Marking - General', + 'supercategory': 'marking--general'}, + {'color': [64, 170, 64], + 'id': 26, + 'isthing': 0, + 'name': 'Mountain', + 'supercategory': 'nature--mountain'}, + {'color': [230, 160, 50], + 'id': 27, + 'isthing': 0, + 'name': 'Sand', + 'supercategory': 'nature--sand'}, + {'color': [70, 130, 180], + 'id': 28, + 'isthing': 0, + 'name': 'Sky', + 'supercategory': 'nature--sky'}, + {'color': [190, 255, 255], + 'id': 29, + 'isthing': 0, + 'name': 'Snow', + 'supercategory': 'nature--snow'}, + {'color': [152, 251, 152], + 'id': 30, + 'isthing': 0, + 'name': 'Terrain', + 'supercategory': 'nature--terrain'}, + {'color': [107, 142, 35], + 'id': 31, + 'isthing': 0, + 'name': 'Vegetation', + 'supercategory': 'nature--vegetation'}, + {'color': [0, 170, 30], + 'id': 32, + 'isthing': 0, + 'name': 'Water', + 'supercategory': 'nature--water'}, + {'color': [255, 255, 128], + 'id': 33, + 'isthing': 1, + 'name': 'Banner', + 'supercategory': 'object--banner'}, + {'color': [250, 0, 30], + 'id': 34, + 'isthing': 1, + 'name': 'Bench', + 'supercategory': 'object--bench'}, + {'color': [100, 140, 180], + 'id': 35, + 'isthing': 1, + 'name': 'Bike Rack', + 'supercategory': 'object--bike-rack'}, + {'color': [220, 220, 220], + 'id': 36, + 'isthing': 1, + 'name': 'Billboard', + 'supercategory': 'object--billboard'}, + {'color': [220, 128, 128], + 'id': 37, + 'isthing': 1, + 'name': 'Catch Basin', + 'supercategory': 'object--catch-basin'}, + {'color': [222, 40, 40], + 'id': 38, + 'isthing': 1, + 'name': 'CCTV Camera', + 'supercategory': 'object--cctv-camera'}, + {'color': [100, 170, 30], + 'id': 39, + 'isthing': 1, + 'name': 'Fire Hydrant', + 'supercategory': 'object--fire-hydrant'}, + {'color': [40, 40, 40], + 'id': 40, + 'isthing': 1, + 'name': 'Junction Box', + 'supercategory': 'object--junction-box'}, + {'color': [33, 33, 33], + 'id': 41, + 'isthing': 1, + 'name': 'Mailbox', + 'supercategory': 'object--mailbox'}, + {'color': [100, 128, 160], + 'id': 42, + 'isthing': 1, + 'name': 'Manhole', + 'supercategory': 'object--manhole'}, + {'color': [142, 0, 0], + 'id': 43, + 'isthing': 1, + 'name': 'Phone Booth', + 'supercategory': 'object--phone-booth'}, + {'color': [70, 100, 150], + 'id': 44, + 'isthing': 0, + 'name': 'Pothole', + 'supercategory': 'object--pothole'}, + {'color': [210, 170, 100], + 'id': 45, + 'isthing': 1, + 'name': 'Street Light', + 'supercategory': 'object--street-light'}, + {'color': [153, 153, 153], + 'id': 46, + 'isthing': 1, + 'name': 'Pole', + 'supercategory': 'object--support--pole'}, + {'color': [128, 128, 128], + 'id': 47, + 'isthing': 1, + 'name': 'Traffic Sign Frame', + 'supercategory': 'object--support--traffic-sign-frame'}, + {'color': [0, 0, 80], + 'id': 48, + 'isthing': 1, + 'name': 'Utility Pole', + 'supercategory': 'object--support--utility-pole'}, + {'color': [250, 170, 30], + 'id': 49, + 'isthing': 1, + 'name': 'Traffic Light', + 'supercategory': 'object--traffic-light'}, + {'color': [192, 192, 192], + 'id': 50, + 'isthing': 1, + 'name': 'Traffic Sign (Back)', + 'supercategory': 'object--traffic-sign--back'}, + {'color': [220, 220, 0], + 'id': 51, + 'isthing': 1, + 'name': 'Traffic Sign (Front)', + 'supercategory': 'object--traffic-sign--front'}, + {'color': [140, 140, 20], + 'id': 52, + 'isthing': 1, + 'name': 'Trash Can', + 'supercategory': 'object--trash-can'}, + {'color': [119, 11, 32], + 'id': 53, + 'isthing': 1, + 'name': 'Bicycle', + 'supercategory': 'object--vehicle--bicycle'}, + {'color': [150, 0, 255], + 'id': 54, + 'isthing': 1, + 'name': 'Boat', + 'supercategory': 'object--vehicle--boat'}, + {'color': [0, 60, 100], + 'id': 55, + 'isthing': 1, + 'name': 'Bus', + 'supercategory': 'object--vehicle--bus'}, + {'color': [0, 0, 142], + 'id': 56, + 'isthing': 1, + 'name': 'Car', + 'supercategory': 'object--vehicle--car'}, + {'color': [0, 0, 90], + 'id': 57, + 'isthing': 1, + 'name': 'Caravan', + 'supercategory': 'object--vehicle--caravan'}, + {'color': [0, 0, 230], + 'id': 58, + 'isthing': 1, + 'name': 'Motorcycle', + 'supercategory': 'object--vehicle--motorcycle'}, + {'color': [0, 80, 100], + 'id': 59, + 'isthing': 0, + 'name': 'On Rails', + 'supercategory': 'object--vehicle--on-rails'}, + {'color': [128, 64, 64], + 'id': 60, + 'isthing': 1, + 'name': 'Other Vehicle', + 'supercategory': 'object--vehicle--other-vehicle'}, + {'color': [0, 0, 110], + 'id': 61, + 'isthing': 1, + 'name': 'Trailer', + 'supercategory': 'object--vehicle--trailer'}, + {'color': [0, 0, 70], + 'id': 62, + 'isthing': 1, + 'name': 'Truck', + 'supercategory': 'object--vehicle--truck'}, + {'color': [0, 0, 192], + 'id': 63, + 'isthing': 1, + 'name': 'Wheeled Slow', + 'supercategory': 'object--vehicle--wheeled-slow'}, + {'color': [32, 32, 32], + 'id': 64, + 'isthing': 0, + 'name': 'Car Mount', + 'supercategory': 'void--car-mount'}, + {'color': [120, 10, 10], + 'id': 65, + 'isthing': 0, + 'name': 'Ego Vehicle', + 'supercategory': 'void--ego-vehicle'} +] + +COCO_STUFF_CATEGORIES = [ + {"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "person"}, + {"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "bicycle"}, + {"color": [0, 0, 142], "isthing": 1, "id": 3, "name": "car"}, + {"color": [0, 0, 230], "isthing": 1, "id": 4, "name": "motorcycle"}, + {"color": [106, 0, 228], "isthing": 1, "id": 5, "name": "airplane"}, + {"color": [0, 60, 100], "isthing": 1, "id": 6, "name": "bus"}, + {"color": [0, 80, 100], "isthing": 1, "id": 7, "name": "train"}, + {"color": [0, 0, 70], "isthing": 1, "id": 8, "name": "truck"}, + {"color": [0, 0, 192], "isthing": 1, "id": 9, "name": "boat"}, + {"color": [250, 170, 30], "isthing": 1, "id": 10, "name": "traffic light"}, + {"color": [100, 170, 30], "isthing": 1, "id": 11, "name": "fire hydrant"}, + {"color": [220, 220, 0], "isthing": 1, "id": 13, "name": "stop sign"}, + {"color": [175, 116, 175], "isthing": 1, "id": 14, "name": "parking meter"}, + {"color": [250, 0, 30], "isthing": 1, "id": 15, "name": "bench"}, + {"color": [165, 42, 42], "isthing": 1, "id": 16, "name": "bird"}, + {"color": [255, 77, 255], "isthing": 1, "id": 17, "name": "cat"}, + {"color": [0, 226, 252], "isthing": 1, "id": 18, "name": "dog"}, + {"color": [182, 182, 255], "isthing": 1, "id": 19, "name": "horse"}, + {"color": [0, 82, 0], "isthing": 1, "id": 20, "name": "sheep"}, + {"color": [120, 166, 157], "isthing": 1, "id": 21, "name": "cow"}, + {"color": [110, 76, 0], "isthing": 1, "id": 22, "name": "elephant"}, + {"color": [174, 57, 255], "isthing": 1, "id": 23, "name": "bear"}, + {"color": [199, 100, 0], "isthing": 1, "id": 24, "name": "zebra"}, + {"color": [72, 0, 118], "isthing": 1, "id": 25, "name": "giraffe"}, + {"color": [255, 179, 240], "isthing": 1, "id": 27, "name": "backpack"}, + {"color": [0, 125, 92], "isthing": 1, "id": 28, "name": "umbrella"}, + {"color": [209, 0, 151], "isthing": 1, "id": 31, "name": "handbag"}, + {"color": [188, 208, 182], "isthing": 1, "id": 32, "name": "tie"}, + {"color": [0, 220, 176], "isthing": 1, "id": 33, "name": "suitcase"}, + {"color": [255, 99, 164], "isthing": 1, "id": 34, "name": "frisbee"}, + {"color": [92, 0, 73], "isthing": 1, "id": 35, "name": "skis"}, + {"color": [133, 129, 255], "isthing": 1, "id": 36, "name": "snowboard"}, + {"color": [78, 180, 255], "isthing": 1, "id": 37, "name": "sports ball"}, + {"color": [0, 228, 0], "isthing": 1, "id": 38, "name": "kite"}, + {"color": [174, 255, 243], "isthing": 1, "id": 39, "name": "baseball bat"}, + {"color": [45, 89, 255], "isthing": 1, "id": 40, "name": "baseball glove"}, + {"color": [134, 134, 103], "isthing": 1, "id": 41, "name": "skateboard"}, + {"color": [145, 148, 174], "isthing": 1, "id": 42, "name": "surfboard"}, + {"color": [255, 208, 186], "isthing": 1, "id": 43, "name": "tennis racket"}, + {"color": [197, 226, 255], "isthing": 1, "id": 44, "name": "bottle"}, + {"color": [171, 134, 1], "isthing": 1, "id": 46, "name": "wine glass"}, + {"color": [109, 63, 54], "isthing": 1, "id": 47, "name": "cup"}, + {"color": [207, 138, 255], "isthing": 1, "id": 48, "name": "fork"}, + {"color": [151, 0, 95], "isthing": 1, "id": 49, "name": "knife"}, + {"color": [9, 80, 61], "isthing": 1, "id": 50, "name": "spoon"}, + {"color": [84, 105, 51], "isthing": 1, "id": 51, "name": "bowl"}, + {"color": [74, 65, 105], "isthing": 1, "id": 52, "name": "banana"}, + {"color": [166, 196, 102], "isthing": 1, "id": 53, "name": "apple"}, + {"color": [208, 195, 210], "isthing": 1, "id": 54, "name": "sandwich"}, + {"color": [255, 109, 65], "isthing": 1, "id": 55, "name": "orange"}, + {"color": [0, 143, 149], "isthing": 1, "id": 56, "name": "broccoli"}, + {"color": [179, 0, 194], "isthing": 1, "id": 57, "name": "carrot"}, + {"color": [209, 99, 106], "isthing": 1, "id": 58, "name": "hot dog"}, + {"color": [5, 121, 0], "isthing": 1, "id": 59, "name": "pizza"}, + {"color": [227, 255, 205], "isthing": 1, "id": 60, "name": "donut"}, + {"color": [147, 186, 208], "isthing": 1, "id": 61, "name": "cake"}, + {"color": [153, 69, 1], "isthing": 1, "id": 62, "name": "chair"}, + {"color": [3, 95, 161], "isthing": 1, "id": 63, "name": "couch"}, + {"color": [163, 255, 0], "isthing": 1, "id": 64, "name": "potted plant"}, + {"color": [119, 0, 170], "isthing": 1, "id": 65, "name": "bed"}, + {"color": [0, 182, 199], "isthing": 1, "id": 67, "name": "dining table"}, + {"color": [0, 165, 120], "isthing": 1, "id": 70, "name": "toilet"}, + {"color": [183, 130, 88], "isthing": 1, "id": 72, "name": "tv"}, + {"color": [95, 32, 0], "isthing": 1, "id": 73, "name": "laptop"}, + {"color": [130, 114, 135], "isthing": 1, "id": 74, "name": "mouse"}, + {"color": [110, 129, 133], "isthing": 1, "id": 75, "name": "remote"}, + {"color": [166, 74, 118], "isthing": 1, "id": 76, "name": "keyboard"}, + {"color": [219, 142, 185], "isthing": 1, "id": 77, "name": "cell phone"}, + {"color": [79, 210, 114], "isthing": 1, "id": 78, "name": "microwave"}, + {"color": [178, 90, 62], "isthing": 1, "id": 79, "name": "oven"}, + {"color": [65, 70, 15], "isthing": 1, "id": 80, "name": "toaster"}, + {"color": [127, 167, 115], "isthing": 1, "id": 81, "name": "sink"}, + {"color": [59, 105, 106], "isthing": 1, "id": 82, "name": "refrigerator"}, + {"color": [142, 108, 45], "isthing": 1, "id": 84, "name": "book"}, + {"color": [196, 172, 0], "isthing": 1, "id": 85, "name": "clock"}, + {"color": [95, 54, 80], "isthing": 1, "id": 86, "name": "vase"}, + {"color": [128, 76, 255], "isthing": 1, "id": 87, "name": "scissors"}, + {"color": [201, 57, 1], "isthing": 1, "id": 88, "name": "teddy bear"}, + {"color": [246, 0, 122], "isthing": 1, "id": 89, "name": "hair drier"}, + {"color": [191, 162, 208], "isthing": 1, "id": 90, "name": "toothbrush"}, + {"id": 92, "name": "banner", "supercategory": "textile"}, + {"id": 93, "name": "blanket", "supercategory": "textile"}, + {"id": 94, "name": "branch", "supercategory": "plant"}, + {"id": 95, "name": "bridge", "supercategory": "building"}, + {"id": 96, "name": "building-other", "supercategory": "building"}, + {"id": 97, "name": "bush", "supercategory": "plant"}, + {"id": 98, "name": "cabinet", "supercategory": "furniture-stuff"}, + {"id": 99, "name": "cage", "supercategory": "structural"}, + {"id": 100, "name": "cardboard", "supercategory": "raw-material"}, + {"id": 101, "name": "carpet", "supercategory": "floor"}, + {"id": 102, "name": "ceiling-other", "supercategory": "ceiling"}, + {"id": 103, "name": "ceiling-tile", "supercategory": "ceiling"}, + {"id": 104, "name": "cloth", "supercategory": "textile"}, + {"id": 105, "name": "clothes", "supercategory": "textile"}, + {"id": 106, "name": "clouds", "supercategory": "sky"}, + {"id": 107, "name": "counter", "supercategory": "furniture-stuff"}, + {"id": 108, "name": "cupboard", "supercategory": "furniture-stuff"}, + {"id": 109, "name": "curtain", "supercategory": "textile"}, + {"id": 110, "name": "desk-stuff", "supercategory": "furniture-stuff"}, + {"id": 111, "name": "dirt", "supercategory": "ground"}, + {"id": 112, "name": "door-stuff", "supercategory": "furniture-stuff"}, + {"id": 113, "name": "fence", "supercategory": "structural"}, + {"id": 114, "name": "floor-marble", "supercategory": "floor"}, + {"id": 115, "name": "floor-other", "supercategory": "floor"}, + {"id": 116, "name": "floor-stone", "supercategory": "floor"}, + {"id": 117, "name": "floor-tile", "supercategory": "floor"}, + {"id": 118, "name": "floor-wood", "supercategory": "floor"}, + {"id": 119, "name": "flower", "supercategory": "plant"}, + {"id": 120, "name": "fog", "supercategory": "water"}, + {"id": 121, "name": "food-other", "supercategory": "food-stuff"}, + {"id": 122, "name": "fruit", "supercategory": "food-stuff"}, + {"id": 123, "name": "furniture-other", "supercategory": "furniture-stuff"}, + {"id": 124, "name": "grass", "supercategory": "plant"}, + {"id": 125, "name": "gravel", "supercategory": "ground"}, + {"id": 126, "name": "ground-other", "supercategory": "ground"}, + {"id": 127, "name": "hill", "supercategory": "solid"}, + {"id": 128, "name": "house", "supercategory": "building"}, + {"id": 129, "name": "leaves", "supercategory": "plant"}, + {"id": 130, "name": "light", "supercategory": "furniture-stuff"}, + {"id": 131, "name": "mat", "supercategory": "textile"}, + {"id": 132, "name": "metal", "supercategory": "raw-material"}, + {"id": 133, "name": "mirror-stuff", "supercategory": "furniture-stuff"}, + {"id": 134, "name": "moss", "supercategory": "plant"}, + {"id": 135, "name": "mountain", "supercategory": "solid"}, + {"id": 136, "name": "mud", "supercategory": "ground"}, + {"id": 137, "name": "napkin", "supercategory": "textile"}, + {"id": 138, "name": "net", "supercategory": "structural"}, + {"id": 139, "name": "paper", "supercategory": "raw-material"}, + {"id": 140, "name": "pavement", "supercategory": "ground"}, + {"id": 141, "name": "pillow", "supercategory": "textile"}, + {"id": 142, "name": "plant-other", "supercategory": "plant"}, + {"id": 143, "name": "plastic", "supercategory": "raw-material"}, + {"id": 144, "name": "platform", "supercategory": "ground"}, + {"id": 145, "name": "playingfield", "supercategory": "ground"}, + {"id": 146, "name": "railing", "supercategory": "structural"}, + {"id": 147, "name": "railroad", "supercategory": "ground"}, + {"id": 148, "name": "river", "supercategory": "water"}, + {"id": 149, "name": "road", "supercategory": "ground"}, + {"id": 150, "name": "rock", "supercategory": "solid"}, + {"id": 151, "name": "roof", "supercategory": "building"}, + {"id": 152, "name": "rug", "supercategory": "textile"}, + {"id": 153, "name": "salad", "supercategory": "food-stuff"}, + {"id": 154, "name": "sand", "supercategory": "ground"}, + {"id": 155, "name": "sea", "supercategory": "water"}, + {"id": 156, "name": "shelf", "supercategory": "furniture-stuff"}, + {"id": 157, "name": "sky-other", "supercategory": "sky"}, + {"id": 158, "name": "skyscraper", "supercategory": "building"}, + {"id": 159, "name": "snow", "supercategory": "ground"}, + {"id": 160, "name": "solid-other", "supercategory": "solid"}, + {"id": 161, "name": "stairs", "supercategory": "furniture-stuff"}, + {"id": 162, "name": "stone", "supercategory": "solid"}, + {"id": 163, "name": "straw", "supercategory": "plant"}, + {"id": 164, "name": "structural-other", "supercategory": "structural"}, + {"id": 165, "name": "table", "supercategory": "furniture-stuff"}, + {"id": 166, "name": "tent", "supercategory": "building"}, + {"id": 167, "name": "textile-other", "supercategory": "textile"}, + {"id": 168, "name": "towel", "supercategory": "textile"}, + {"id": 169, "name": "tree", "supercategory": "plant"}, + {"id": 170, "name": "vegetable", "supercategory": "food-stuff"}, + {"id": 171, "name": "wall-brick", "supercategory": "wall"}, + {"id": 172, "name": "wall-concrete", "supercategory": "wall"}, + {"id": 173, "name": "wall-other", "supercategory": "wall"}, + {"id": 174, "name": "wall-panel", "supercategory": "wall"}, + {"id": 175, "name": "wall-stone", "supercategory": "wall"}, + {"id": 176, "name": "wall-tile", "supercategory": "wall"}, + {"id": 177, "name": "wall-wood", "supercategory": "wall"}, + {"id": 178, "name": "water-other", "supercategory": "water"}, + {"id": 179, "name": "waterdrops", "supercategory": "water"}, + {"id": 180, "name": "window-blind", "supercategory": "window"}, + {"id": 181, "name": "window-other", "supercategory": "window"}, + {"id": 182, "name": "wood", "supercategory": "solid"}, +] + + +def get_coco_categories_with_prompt_eng(): + COCO_CATEGORIES_ = copy.deepcopy(COCO_CATEGORIES) + coco_id_names = open('./mask_adapter/data/datasets/coco_panoptic_with_prompt_eng.txt').read().splitlines() + coco_idx = 0 + for line in coco_id_names: + idx, name = line.split(':') + idx = int(idx) + if idx == 0 or name == "invalid_class_id": + continue + #print(COCO_CATEGORIES_[coco_idx]["name"], '->', name) + assert COCO_CATEGORIES_[coco_idx]["id"] == idx + COCO_CATEGORIES_[coco_idx]["name"] = name + coco_idx += 1 + return COCO_CATEGORIES_ + + +def get_coco_stuff_categories_with_prompt_eng(): + COCO_STUFF_CATEGORIES_ = copy.deepcopy(COCO_STUFF_CATEGORIES) + coco_id_names = open('./mask_adapter/data/datasets/coco_stuff_with_prompt_eng.txt').read().splitlines() + coco_idx = 0 + for line in coco_id_names: + idx, name = line.split(':') + idx = int(idx) + if idx == 0 or name == "invalid_class_id": + continue + #print(COCO_STUFF_CATEGORIES_[coco_idx]["name"], '->', name) + assert COCO_STUFF_CATEGORIES_[coco_idx]["id"] == idx + COCO_STUFF_CATEGORIES_[coco_idx]["name"] = name + coco_idx += 1 + return COCO_STUFF_CATEGORIES_ + + +def get_ade20k_categories_with_prompt_eng(): + ADE20K_150_CATEGORIES_ = copy.deepcopy(ADE20K_150_CATEGORIES) + ade20k_id_names = open('./mask_adapter/data/datasets/ade20k_150_with_prompt_eng.txt').read().splitlines() + ade_idx = 0 + for line in ade20k_id_names: + idx, name = line.split(':') + idx = int(idx) + if idx == 0 or name == "invalid_class_id": + continue + #print(ADE20K_150_CATEGORIES_[ade_idx]["name"], '->', name) + assert ADE20K_150_CATEGORIES_[ade_idx]["id"] == idx - 1 + ADE20K_150_CATEGORIES_[ade_idx]["name"] = name + ade_idx += 1 + return ADE20K_150_CATEGORIES_ + + +def get_cityscapes_categories_with_prompt_eng(): + CITYSCAPES_CATEGORIES_ = copy.deepcopy(CITYSCAPES_CATEGORIES) + cityscapes_id_names = open('./mask_adapter/data/datasets/cityscapes_with_prompt_eng.txt').read().splitlines() + cityscapes_idx = 0 + for line in cityscapes_id_names: + idx, name = line.split(':') + idx = int(idx) + if name == "invalid_class_id": + continue + #print(CITYSCAPES_CATEGORIES_[cityscapes_idx]["name"], '->', name) + assert CITYSCAPES_CATEGORIES_[cityscapes_idx]["trainId"] == idx + CITYSCAPES_CATEGORIES_[cityscapes_idx]["name"] = name + cityscapes_idx += 1 + return CITYSCAPES_CATEGORIES_ + +def get_ade20k_847_categories_with_prompt_eng(): + ADE20K_847_CATEGORIES_ = copy.deepcopy(ADE20K_847_CATEGORIES) + ade20k_847_id_names = open('./mask_adapter/data/datasets/ade20k_847_with_prompt_eng.txt').read().splitlines() + ade_idx = 0 + for line in ade20k_847_id_names: + idx, name = line.split(':') + idx = int(idx) + if idx == 0 or name == "invalid_class_id": + continue + #print(ADE20K_847_CATEGORIES_[ade_idx]["name"], '->', name) + # assert ADE20K_847_CATEGORIES_[ade_idx]["id"] == idx - 1 + ADE20K_847_CATEGORIES_[ade_idx]["name"] = name + ade_idx += 1 + return ADE20K_847_CATEGORIES_ + +def get_pascal_21_categories_with_prompt_eng(): + PASCAL_VOC_21_CATEGORIES_ = copy.deepcopy(PASCAL_VOC_21_CATEGORIES) + pascal_21_id_names = open('./mask_adapter/data/datasets/pascal_voc_21_with_prompt_eng.txt').read().splitlines() + pas_idx = 0 + for line in pascal_21_id_names: + idx, name = line.split(':') + idx = int(idx) + PASCAL_VOC_21_CATEGORIES_[pas_idx]["name"] = name + pas_idx += 1 + return PASCAL_VOC_21_CATEGORIES_ + +def get_pascal_ctx_459_categories_with_prompt_eng(): + PASCAL_CTX_459_CATEGORIES_ = copy.deepcopy(PASCAL_CTX_459_CATEGORIES) + pascal_ctx_459_id_names = open('./mask_adapter/data/datasets/pascal_ctx_459_with_prompt_eng.txt').read().splitlines() + pas_idx = 0 + for line in pascal_ctx_459_id_names: + idx, name = line.split(':') + idx = int(idx) + if idx == 0 or name == "invalid_class_id": + continue + PASCAL_CTX_459_CATEGORIES_[pas_idx]["name"] = name + pas_idx += 1 + return PASCAL_CTX_459_CATEGORIES_ + +def get_pascal_ctx_59_categories_with_prompt_eng(): + PASCAL_CTX_59_CATEGORIES_ = copy.deepcopy(PASCAL_CTX_59_CATEGORIES) + pascal_ctx_59_id_names = open('./mask_adapter/data/datasets/pascal_ctx_59_with_prompt_eng.txt').read().splitlines() + pas_idx = 0 + for line in pascal_ctx_59_id_names: + idx, name = line.split(':') + idx = int(idx) + if idx == 0 or name == "invalid_class_id": + continue + PASCAL_CTX_59_CATEGORIES_[pas_idx]["name"] = name + pas_idx += 1 + return PASCAL_CTX_59_CATEGORIES_ + +def get_mapillary_vistas_categories_with_prompt_eng(): + MAPILLARY_VISTAS_SEM_SEG_CATEGORIES_ = copy.deepcopy(MAPILLARY_VISTAS_SEM_SEG_CATEGORIES) + mapillary_vistas_id_names = open('./mask_adapter/data/datasets/mapillary_vistas_with_prompt_eng.txt').read().splitlines() + mapillary_idx = 0 + for line in mapillary_vistas_id_names: + idx, name = line.split(':') + idx = int(idx) + if idx == 0 or name == "invalid_class_id": + continue + MAPILLARY_VISTAS_SEM_SEG_CATEGORIES_[mapillary_idx]["name"] = name + mapillary_idx += 1 + return MAPILLARY_VISTAS_SEM_SEG_CATEGORIES_ + +def get_grand_categories_with_prompt_eng(): + GRAND_CATEGORIES_ = [] + grand_id_names = open('./mask_adapter/data/datasets/grand_with_prompt_eng.txt').read().splitlines() + for line in grand_id_names: + idx, name = line.split(':') + idx = int(idx) + if idx == 0 or name == "invalid_class_id": + continue + GRAND_CATEGORIES_.append({"id": idx, "name": name}) + return GRAND_CATEGORIES_ + +if __name__ == "__main__": + get_coco_categories_with_prompt_eng() + get_ade20k_categories_with_prompt_eng() + get_cityscapes_categories_with_prompt_eng() + get_ade20k_847_categories_with_prompt_eng() + get_pascal_21_categories_with_prompt_eng() + get_pascal_ctx_459_categories_with_prompt_eng() + get_pascal_ctx_59_categories_with_prompt_eng() + get_mapillary_vistas_categories_with_prompt_eng() + get_coco_stuff_categories_with_prompt_eng() + get_grand_categories_with_prompt_eng() \ No newline at end of file diff --git a/mask_adapter/data/datasets/pascal_ctx_459_with_prompt_eng.txt b/mask_adapter/data/datasets/pascal_ctx_459_with_prompt_eng.txt new file mode 100644 index 0000000000000000000000000000000000000000..c281c47a5ffa24dde42e6151e982ac03e0835138 --- /dev/null +++ b/mask_adapter/data/datasets/pascal_ctx_459_with_prompt_eng.txt @@ -0,0 +1,460 @@ +0:invalid_class_id +1:accordion +2:aeroplane,aeroplanes,airplanes,airplane +3:air conditioner +4:antenna +5:artillery +6:ashtray +7:atrium +8:baby carriage +9:bag,bags +10:ball +11:balloon +12:bamboo weaving +13:barrel +14:baseball bat +15:basket +16:basketball backboard +17:bathtub +18:bed,beds +19:bedclothes +20:beer +21:bell +22:bench,benches +23:bicycle,bicycles +24:binoculars +25:bird,birds +26:bird cage +27:bird feeder +28:bird nest +29:blackboard +30:board +31:boat,boats +32:bone +33:book,books +34:bottle,bottles,water bottle +35:bottle opener +36:bowl +37:box +38:bracelet +39:brick +40:bridge +41:broom +42:brush +43:bucket +44:building,buildings +45:bus,buses +46:cabinet,cabinets,drawer,drawers +47:cabinet door +48:cage +49:cake +50:calculator +51:calendar +52:camel +53:camera +54:camera lens +55:can +56:candle +57:candle holder +58:cap +59:car,cars +60:card +61:cart +62:computer case +63:casette recorder +64:cash register +65:cat,cats,kitties,kitty +66:cd +67:cd player +68:ceiling +69:cell phone +70:cello +71:chain +72:chair,chairs +73:chessboard +74:chicken +75:chopstick +76:clip +77:clippers +78:clock +79:closet +80:cloth,clothes +81:clothes tree +82:coffee +83:coffee machine +84:comb +85:computer +86:concrete +87:cone +88:container +89:control booth +90:controller +91:cooker +92:copying machine +93:coral +94:cork +95:corkscrew +96:counter +97:court +98:cow,cows +99:crabstick +100:crane +101:crate +102:cross +103:crutch +104:cup,cups +105:curtain,curtains +106:cushion +107:cutting board +108:dais +109:disc +110:disc case +111:dishwasher +112:dock +113:dog,dogs,puppy,puppies +114:dolphin +115:door,doors +116:drainer +117:dray +118:drink dispenser +119:drinking machine +120:drop +121:drug +122:drum +123:drum kit +124:duck +125:dumbbell +126:earphone +127:earrings +128:egg +129:electric fan +130:electric iron +131:electric pot +132:electric saw +133:electronic keyboard +134:engine +135:envelope +136:equipment +137:escalator +138:exhibition booth +139:extinguisher +140:eyeglass +141:fan +142:faucet +143:fax machine +144:fence,fences +145:ferris wheel +146:fire extinguisher +147:fire hydrant +148:fire place +149:fish +150:fish tank +151:fishbowl +152:fishing net +153:fishing pole +154:flag +155:flagstaff +156:flame +157:flashlight +158:floor,tile ground,carpet,rug,flooring +159:flower,flowers +160:fly +161:foam +162:food +163:footbridge +164:forceps +165:fork +166:forklift +167:fountain +168:fox +169:frame +170:fridge +171:frog +172:fruit +173:funnel +174:furnace +175:game controller +176:game machine +177:gas cylinder +178:gas hood +179:gas stove +180:gift box +181:glass +182:glass marble +183:globe +184:glove +185:goal +186:grandstand +187:grass,grasses,lawn,turf +188:gravestone +189:ground,soil,soil ground,dirt ground +190:guardrail +191:guitar +192:gun +193:hammer +194:hand cart +195:handle +196:handrail +197:hanger +198:hard disk drive +199:hat +200:hay +201:headphone +202:heater +203:helicopter +204:helmet +205:holder +206:hook +207:horse,horses,foal +208:horse-drawn carriage +209:hot-air balloon +210:hydrovalve +211:ice +212:inflator pump +213:ipod +214:iron +215:ironing board +216:jar +217:kart +218:kettle +219:key +220:keyboard,keyboards +221:kitchen range +222:kite +223:knife +224:knife block +225:ladder +226:ladder truck +227:ladle +228:laptop +229:leaves +230:lid +231:life buoy +232:lamp,lamps +233:light bulb +234:lighter +235:line +236:lion +237:lobster +238:lock +239:machine +240:mailbox +241:mannequin +242:map +243:mask +244:mat +245:match book +246:mattress +247:menu +248:metal +249:meter box +250:microphone +251:microwave +252:mirror +253:missile +254:model +255:money +256:monkey +257:mop +258:motorbike,motorcycle,motorbikes,motorcycles +259:mountain,mountains +260:mouse +261:mouse pad +262:musical instrument +263:napkin +264:net +265:newspaper +266:oar +267:ornament +268:outlet +269:oven +270:oxygen bottle +271:pack +272:pan +273:paper +274:paper box +275:paper cutter +276:parachute +277:parasol +278:parterre +279:patio +280:pelage +281:pen +282:pen container +283:pencil +284:person,child,girl,boy,woman,man,people,childeren,girls,boys,women,men,lady,guy,ladies,guys +285:photo +286:piano +287:picture +288:pig +289:pillar +290:pillow +291:pipe +292:pitcher +293:plant +294:plastic +295:plate,plates +296:platform,platforms +297:player +298:playground +299:pliers +300:plume +301:poker +302:poker chip +303:pole +304:pool table +305:postcard +306:poster +307:pot +308:pottedplant,pottedplants,plant pot,plant pots,planter,planters +309:printer +310:projector +311:pumpkin +312:rabbit +313:racket +314:radiator +315:radio +316:rail +317:rake +318:ramp +319:range hood +320:receiver +321:recorder +322:recreational machines +323:remote control +324:road,street,streets +325:robot +326:rock,rocks,stone,stones +327:rocket +328:rocking horse +329:rope +330:rug +331:ruler +332:runway +333:saddle +334:sand +335:saw +336:scale +337:scanner +338:scissors +339:scoop +340:screen +341:screwdriver +342:sculpture +343:scythe +344:sewer +345:sewing machine +346:shed +347:sheep +348:shell +349:shelves,shelf +350:shoe +351:shopping cart +352:shovel +353:sidecar +354:sidewalk +355:sign,signs +356:signal light +357:sink +358:skateboard +359:ski +360:sky,clouds +361:sled +362:slippers +363:smoke +364:snail +365:snake +366:snow +367:snowmobiles +368:sofa +369:spanner +370:spatula +371:speaker +372:speed bump +373:spice container +374:spoon +375:sprayer +376:squirrel +377:stage +378:stair +379:stapler +380:stick +381:sticky note +382:stone +383:stool +384:stove +385:straw +386:stretcher +387:sun +388:sunglass +389:sunshade +390:surveillance camera +391:swan +392:sweeper +393:swim ring +394:swimming pool +395:swing +396:switch +397:table,diningtable,diningtables,tables,desk,desks,side table,side tables,coffee table +398:tableware +399:tank +400:tap +401:tape +402:tarp +403:telephone +404:telephone booth +405:tent +406:tire +407:toaster +408:toilet +409:tong +410:tool +411:toothbrush +412:towel +413:toy +414:toy car +415:track,train track,railroad +416:train,trains,locomotive,locomotives,freight train +417:trampoline +418:trash bin +419:tray +420:tree,trees +421:tricycle +422:tripod +423:trophy +424:truck,trucks +425:tube +426:turtle +427:tvmonitor,monitor,tv +428:tweezers +429:typewriter +430:umbrella +431:unknown +432:vacuum cleaner +433:vending machine +434:video camera +435:video game console +436:video player +437:video tape +438:violin +439:wakeboard +440:wall,walls +441:wallet +442:wardrobe +443:washing machine +444:watch +445:water +446:water dispenser +447:water pipe +448:water skate board +449:watermelon +450:whale +451:wharf +452:wheel +453:wheelchair +454:window,windows +455:window blinds +456:wineglass +457:wire +458:wood piece +459:wool \ No newline at end of file diff --git a/mask_adapter/data/datasets/pascal_ctx_59_with_prompt_eng.txt b/mask_adapter/data/datasets/pascal_ctx_59_with_prompt_eng.txt new file mode 100644 index 0000000000000000000000000000000000000000..5bd5242d89b7bf12043cc051fa0d8f72e99110fe --- /dev/null +++ b/mask_adapter/data/datasets/pascal_ctx_59_with_prompt_eng.txt @@ -0,0 +1,60 @@ +0:invalid_class_id +1:aeroplane,aeroplanes,airplanes,airplane +2:bag,bags +3:bed,beds +4:bedclothes +5:bench,benches +6:bicycle,bicycles +7:bird,birds +8:boat,boats +9:book,books +10:bottle,bottles,water bottle +11:building,buildings +12:bus,buses +13:cabinet,cabinets,drawer,drawers +14:car,cars +15:cat,cats,kitties,kitty +16:ceiling +17:chair,chairs +18:cloth,clothes +19:computer case +20:cow,cows +21:cup,cups +22:curtain,curtains +23:dog,dogs,puppy,puppies +24:door,doors +25:fence,fences +26:floor,tile ground,carpet,rug,flooring +27:flower,flowers +28:food +29:grass,grasses,lawn,turf +30:ground,soil,soil ground,dirt ground +31:horse,horses,foal +32:keyboard,keyboards +33:lamp,lamps,bulb,bulbs +34:motorbike,motorcycle,motorbikes,motorcycles +35:mountain,mountains +36:mouse +37:person,child,girl,boy,woman,man,people,childeren,girls,boys,women,men,lady,guy,ladies,guys +38:plate,plates +39:platform,platforms +40:pottedplant,pottedplants,plant pot,plant pots,planter,planters +41:street,streets +42:rock,rocks,stone,stones +43:sheep +44:shelves,shelf +45:sidewalk +46:sign,signs +47:sky,clouds +48:snow +49:sofa +50:diningtable,diningtables,table,tables,desk,desks,side table,side tables,coffee table +51:track,train track,railroad +52:train,trains,locomotive,locomotives,freight train +53:tree,trees +54:truck,trucks +55:tvmonitor,monitor,tv +56:wall,walls +57:water +58:window,windows +59:wood piece \ No newline at end of file diff --git a/mask_adapter/data/datasets/pascal_voc_21_with_prompt_eng.txt b/mask_adapter/data/datasets/pascal_voc_21_with_prompt_eng.txt new file mode 100644 index 0000000000000000000000000000000000000000..08f525d73afec5323496a6da35825dfd24032101 --- /dev/null +++ b/mask_adapter/data/datasets/pascal_voc_21_with_prompt_eng.txt @@ -0,0 +1,21 @@ +0:background,crops,bush,shrub,tiles,pavement,rug,carpet,box,boxes,speaker,storage,painting,board,panel,poster,clock,cage,drinking glass,park,plaything,toy,fireplace,bag,bag,bed,bench,book,books,building,buildings,cabinet,drawer,ceiling,computer,computer case,cup,cups,door,fence,floor,flower,grass,lawn,turf,ground,soil,dirt,tiles,keyboard,lamp,mountain,hills,mouse,curtain,platform,sign,street,rock,stone,shelf,sidewalk,sky,clouds,snow,track,train track,tree,trees,wall,water,window,wood,woods +1:aeroplane,airplane,aeroplanes,airplanes +2:bicycle,bicycles,bike,bikes +3:bird,birds +4:boat,boats +5:bottle,bottles,water bottle +6:bus,buses +7:car,cars +8:cat,cats,kitties,kitty +9:chair,chairs +10:cow,cows,calf +11:diningtable,dining table,diningtables,dining tables,plate,plates +12:dog,dogs,puppy,puppies +13:horse,horses,foal +14:motorbike,motorcycle,motorbikes,motorcycles +15:person,child,girl,boy,woman,man,people,childeren,girls,boys,women,men,lady,guy,ladies,guys,clothes +16:pottedplant,pottedplants,plant pot,plant pots,planter,planters +17:sheep +18:sofa,sofas +19:train,trains,locomotive,locomotives,freight train +20:tvmonitor,monitor,tv \ No newline at end of file diff --git a/mask_adapter/data/datasets/register_ade20k_full.py b/mask_adapter/data/datasets/register_ade20k_full.py new file mode 100644 index 0000000000000000000000000000000000000000..7f4154e6f81081da9e04e9a90c243194011591d2 --- /dev/null +++ b/mask_adapter/data/datasets/register_ade20k_full.py @@ -0,0 +1,62 @@ +""" +This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”). +All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. + +Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/datasets/register_ade20k_full.py +""" + +import os + +import numpy as np + +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.data.datasets import load_sem_seg + +from . import openseg_classes + +ADE20K_847_CATEGORIES = openseg_classes.get_ade20k_847_categories_with_prompt_eng() + +ADE20k_847_COLORS = [np.random.randint(256, size=3).tolist() for k in ADE20K_847_CATEGORIES] + +MetadataCatalog.get("openvocab_ade20k_full_sem_seg_train").set( + stuff_colors=ADE20k_847_COLORS[:], +) + +MetadataCatalog.get("openvocab_ade20k_full_sem_seg_val").set( + stuff_colors=ADE20k_847_COLORS[:], +) + + +def _get_ade20k_847_meta(): + # We only need class names + stuff_classes = [k["name"] for k in ADE20K_847_CATEGORIES] + assert len(stuff_classes) == 847, len(stuff_classes) + + ret = { + "stuff_classes": stuff_classes, + } + return ret + + +def register_all_ade20k_847(root): + root = os.path.join(root, "ADE20K_2021_17_01") + meta = _get_ade20k_847_meta() + for name, dirname in [("train", "training"), ("val", "validation")]: + image_dir = os.path.join(root, "images_detectron2", dirname) + gt_dir = os.path.join(root, "annotations_detectron2", dirname) + name = f"openvocab_ade20k_full_sem_seg_{name}" + DatasetCatalog.register( + name, lambda x=image_dir, y=gt_dir: load_sem_seg(y, x, gt_ext="tif", image_ext="jpg") + ) + MetadataCatalog.get(name).set( + stuff_classes=meta["stuff_classes"][:], + image_root=image_dir, + sem_seg_root=gt_dir, + evaluator_type="sem_seg", + ignore_label=65535, # NOTE: gt is saved in 16-bit TIFF images + gt_ext="tif", + ) + + +_root = os.getenv("DETECTRON2_DATASETS", "datasets") +register_all_ade20k_847(_root) \ No newline at end of file diff --git a/mask_adapter/data/datasets/register_ade20k_instance.py b/mask_adapter/data/datasets/register_ade20k_instance.py new file mode 100644 index 0000000000000000000000000000000000000000..a158b19659c275083e102babbbf692610d3a6564 --- /dev/null +++ b/mask_adapter/data/datasets/register_ade20k_instance.py @@ -0,0 +1,61 @@ +""" +This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”). +All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. + +Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/datasets/register_ade20k_instance.py +""" + +import json +import logging +import numpy as np +import os +from PIL import Image + +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.data.datasets.coco import load_coco_json, register_coco_instances +from detectron2.utils.file_io import PathManager + +from . import openseg_classes +import copy +ADE_CATEGORIES = copy.deepcopy(openseg_classes.ADE20K_150_CATEGORIES) +ADE_CATEGORIES = [x for x in ADE_CATEGORIES if x["isthing"] == 1] + +_PREDEFINED_SPLITS = { + # point annotations without masks + "openvocab_ade20k_instance_train": ( + "ADEChallengeData2016/images/training", + "ADEChallengeData2016/ade20k_instance_train.json", + ), + "openvocab_ade20k_instance_val": ( + "ADEChallengeData2016/images/validation", + "ADEChallengeData2016/ade20k_instance_val.json", + ), +} + + +def _get_ade_instances_meta(): + thing_ids = [k["id"] for k in ADE_CATEGORIES] + assert len(thing_ids) == 100, len(thing_ids) + # Mapping from the incontiguous ADE category id to an id in [0, 99] + thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)} + thing_classes = [k["name"] for k in ADE_CATEGORIES] + ret = { + "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id, + "thing_classes": thing_classes, + } + return ret + + +def register_all_ade20k_instance(root): + for key, (image_root, json_file) in _PREDEFINED_SPLITS.items(): + # Assume pre-defined datasets live in `./datasets`. + register_coco_instances( + key, + _get_ade_instances_meta(), + os.path.join(root, json_file) if "://" not in json_file else json_file, + os.path.join(root, image_root), + ) + + +_root = os.getenv("DETECTRON2_DATASETS", "datasets") +register_all_ade20k_instance(_root) diff --git a/mask_adapter/data/datasets/register_ade20k_panoptic.py b/mask_adapter/data/datasets/register_ade20k_panoptic.py new file mode 100644 index 0000000000000000000000000000000000000000..f84b9006867517a8a3e8074f4d04b5356c47f0aa --- /dev/null +++ b/mask_adapter/data/datasets/register_ade20k_panoptic.py @@ -0,0 +1,222 @@ +""" +This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”). +All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. + +Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/datasets/register_ade20k_panoptic.py +""" + +import json +import os + +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.utils.file_io import PathManager +from detectron2.data.datasets.coco import load_sem_seg + + +from . import openseg_classes + +ADE20K_150_CATEGORIES = openseg_classes.get_ade20k_categories_with_prompt_eng() + +ADE20k_COLORS = [k["color"] for k in ADE20K_150_CATEGORIES] + +MetadataCatalog.get("openvocab_ade20k_sem_seg_train").set( + stuff_colors=ADE20k_COLORS[:], +) + +MetadataCatalog.get("openvocab_ade20k_sem_seg_val").set( + stuff_colors=ADE20k_COLORS[:], +) + + +def load_ade20k_panoptic_json(json_file, image_dir, gt_dir, semseg_dir, meta,panoptic_name): + """ + Args: + image_dir (str): path to the raw dataset. e.g., "~/coco/train2017". + gt_dir (str): path to the raw annotations. e.g., "~/coco/panoptic_train2017". + json_file (str): path to the json file. e.g., "~/coco/annotations/panoptic_train2017.json". + Returns: + list[dict]: a list of dicts in Detectron2 standard format. (See + `Using Custom Datasets `_ ) + """ + + def _convert_category_id(segment_info, meta): + if segment_info["category_id"] in meta["thing_dataset_id_to_contiguous_id"]: + segment_info["category_id"] = meta["thing_dataset_id_to_contiguous_id"][ + segment_info["category_id"] + ] + segment_info["isthing"] = True + else: + segment_info["category_id"] = meta["stuff_dataset_id_to_contiguous_id"][ + segment_info["category_id"] + ] + segment_info["isthing"] = False + return segment_info + + with PathManager.open(json_file) as f: + json_info = json.load(f) + + ret = [] + for ann in json_info["annotations"]: + image_id = ann["image_id"] + # TODO: currently we assume image and label has the same filename but + # different extension, and images have extension ".jpg" for COCO. Need + # to make image extension a user-provided argument if we extend this + # function to support other COCO-like datasets. + image_file = os.path.join(image_dir, os.path.splitext(ann["file_name"])[0] + ".jpg") + label_file = os.path.join(gt_dir, ann["file_name"]) + sem_label_file = os.path.join(semseg_dir, ann["file_name"]) + segments_info = [_convert_category_id(x, meta) for x in ann["segments_info"]] + ret.append( + { + "file_name": image_file, + "image_id": image_id, + "pan_seg_file_name": label_file, + "sem_seg_file_name": sem_label_file, + "segments_info": segments_info, + "dataname": panoptic_name, + } + ) + assert len(ret), f"No images found in {image_dir}!" + assert PathManager.isfile(ret[0]["file_name"]), ret[0]["file_name"] + assert PathManager.isfile(ret[0]["pan_seg_file_name"]), ret[0]["pan_seg_file_name"] + assert PathManager.isfile(ret[0]["sem_seg_file_name"]), ret[0]["sem_seg_file_name"] + return ret + + +def register_ade20k_panoptic( + name, metadata, image_root, panoptic_root, semantic_root, panoptic_json, instances_json=None +): + """ + Register a "standard" version of ADE20k panoptic segmentation dataset named `name`. + The dictionaries in this registered dataset follows detectron2's standard format. + Hence it's called "standard". + Args: + name (str): the name that identifies a dataset, + e.g. "ade20k_panoptic_train" + metadata (dict): extra metadata associated with this dataset. + image_root (str): directory which contains all the images + panoptic_root (str): directory which contains panoptic annotation images in COCO format + panoptic_json (str): path to the json panoptic annotation file in COCO format + sem_seg_root (none): not used, to be consistent with + `register_coco_panoptic_separated`. + instances_json (str): path to the json instance annotation file + """ + panoptic_name = name + DatasetCatalog.register( + panoptic_name, + lambda: load_ade20k_panoptic_json( + panoptic_json, image_root, panoptic_root, semantic_root, metadata, panoptic_name + ), + ) + MetadataCatalog.get(panoptic_name).set( + panoptic_root=panoptic_root, + image_root=image_root, + panoptic_json=panoptic_json, + json_file=instances_json, + evaluator_type="ade20k_panoptic_seg", + ignore_label=255, + label_divisor=1000, + **metadata, + ) + + +_PREDEFINED_SPLITS_ADE20K_PANOPTIC = { + "openvocab_ade20k_panoptic_train": ( + "ADEChallengeData2016/images/training", + "ADEChallengeData2016/ade20k_panoptic_train", + "ADEChallengeData2016/ade20k_panoptic_train.json", + "ADEChallengeData2016/annotations_detectron2/training", + "ADEChallengeData2016/ade20k_instance_train.json", + ), + "openvocab_ade20k_panoptic_val": ( + "ADEChallengeData2016/images/validation", + "ADEChallengeData2016/ade20k_panoptic_val", + "ADEChallengeData2016/ade20k_panoptic_val.json", + "ADEChallengeData2016/annotations_detectron2/validation", + "ADEChallengeData2016/ade20k_instance_val.json", + ), +} + + +def get_metadata(): + meta = {} + # The following metadata maps contiguous id from [0, #thing categories + + # #stuff categories) to their names and colors. We have to replica of the + # same name and color under "thing_*" and "stuff_*" because the current + # visualization function in D2 handles thing and class classes differently + # due to some heuristic used in Panoptic FPN. We keep the same naming to + # enable reusing existing visualization functions. + thing_classes = [k["name"] for k in ADE20K_150_CATEGORIES if k["isthing"] == 1] + thing_colors = [k["color"] for k in ADE20K_150_CATEGORIES if k["isthing"] == 1] + stuff_classes = [k["name"] for k in ADE20K_150_CATEGORIES] + stuff_colors = [k["color"] for k in ADE20K_150_CATEGORIES] + + meta["thing_classes"] = thing_classes + meta["thing_colors"] = thing_colors + meta["stuff_classes"] = stuff_classes + meta["stuff_colors"] = stuff_colors + + # Convert category id for training: + # category id: like semantic segmentation, it is the class id for each + # pixel. Since there are some classes not used in evaluation, the category + # id is not always contiguous and thus we have two set of category ids: + # - original category id: category id in the original dataset, mainly + # used for evaluation. + # - contiguous category id: [0, #classes), in order to train the linear + # softmax classifier. + thing_dataset_id_to_contiguous_id = {} + stuff_dataset_id_to_contiguous_id = {} + + for i, cat in enumerate(ADE20K_150_CATEGORIES): + if cat["isthing"]: + thing_dataset_id_to_contiguous_id[cat["id"]] = i + # else: + # stuff_dataset_id_to_contiguous_id[cat["id"]] = i + + # in order to use sem_seg evaluator + stuff_dataset_id_to_contiguous_id[cat["id"]] = i + + meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id + meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id + + return meta + + +def register_all_ade20k_panoptic(root): + metadata = get_metadata() + for ( + prefix, + (image_root, panoptic_root, panoptic_json, semantic_root, instance_json), + ) in _PREDEFINED_SPLITS_ADE20K_PANOPTIC.items(): + # The "standard" version of COCO panoptic segmentation dataset, + # e.g. used by Panoptic-DeepLab + register_ade20k_panoptic( + prefix, + metadata, + os.path.join(root, image_root), + os.path.join(root, panoptic_root), + os.path.join(root, semantic_root), + os.path.join(root, panoptic_json), + os.path.join(root, instance_json), + ) + +def register_all_ade20k_semantic(root): + root = os.path.join(root, "ADEChallengeData2016") + for name, dirname in [("train", "training"), ("val", "validation")]: + image_dir = os.path.join(root, "images", dirname) + gt_dir = os.path.join(root, "annotations_detectron2", dirname) + name = f"openvocab_ade20k_sem_seg_{name}" + DatasetCatalog.register( + name, lambda x=image_dir, y=gt_dir: load_sem_seg(y, x, gt_ext="png", image_ext="jpg") + ) + MetadataCatalog.get(name).set( + stuff_classes=[x["name"] for x in ADE20K_150_CATEGORIES], + image_root=image_dir, + sem_seg_root=gt_dir, + evaluator_type="sem_seg", + ignore_label=255, + ) + +_root = os.getenv("DETECTRON2_DATASETS", "datasets") +register_all_ade20k_panoptic(_root) +register_all_ade20k_semantic(_root) \ No newline at end of file diff --git a/mask_adapter/data/datasets/register_cityscapes_panoptic.py b/mask_adapter/data/datasets/register_cityscapes_panoptic.py new file mode 100644 index 0000000000000000000000000000000000000000..4f0dd97b57768d72dc79d41de5621e3c62dfd707 --- /dev/null +++ b/mask_adapter/data/datasets/register_cityscapes_panoptic.py @@ -0,0 +1,200 @@ +""" +This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”). +All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. + +Reference: https://github.com/facebookresearch/detectron2/blob/main/detectron2/data/datasets/cityscapes_panoptic.py +""" + +import json +import logging +import os + +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.utils.file_io import PathManager + +from . import openseg_classes + +CITYSCAPES_CATEGORIES = openseg_classes.get_cityscapes_categories_with_prompt_eng() + +""" +This file contains functions to register the Cityscapes panoptic dataset to the DatasetCatalog. +""" + + +logger = logging.getLogger(__name__) + + +def get_cityscapes_panoptic_files(image_dir, gt_dir, json_info): + files = [] + # scan through the directory + cities = PathManager.ls(image_dir) + logger.info(f"{len(cities)} cities found in '{image_dir}'.") + image_dict = {} + for city in cities: + city_img_dir = os.path.join(image_dir, city) + for basename in PathManager.ls(city_img_dir): + image_file = os.path.join(city_img_dir, basename) + + suffix = "_leftImg8bit.png" + assert basename.endswith(suffix), basename + basename = os.path.basename(basename)[: -len(suffix)] + + image_dict[basename] = image_file + + for ann in json_info["annotations"]: + image_file = image_dict.get(ann["image_id"], None) + assert image_file is not None, "No image {} found for annotation {}".format( + ann["image_id"], ann["file_name"] + ) + label_file = os.path.join(gt_dir, ann["file_name"]) + segments_info = ann["segments_info"] + + files.append((image_file, label_file, segments_info)) + + assert len(files), "No images found in {}".format(image_dir) + assert PathManager.isfile(files[0][0]), files[0][0] + assert PathManager.isfile(files[0][1]), files[0][1] + return files + + +def load_cityscapes_panoptic(image_dir, gt_dir, gt_json, meta): + """ + Args: + image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train". + gt_dir (str): path to the raw annotations. e.g., + "~/cityscapes/gtFine/cityscapes_panoptic_train". + gt_json (str): path to the json file. e.g., + "~/cityscapes/gtFine/cityscapes_panoptic_train.json". + meta (dict): dictionary containing "thing_dataset_id_to_contiguous_id" + and "stuff_dataset_id_to_contiguous_id" to map category ids to + contiguous ids for training. + + Returns: + list[dict]: a list of dicts in Detectron2 standard format. (See + `Using Custom Datasets `_ ) + """ + + def _convert_category_id(segment_info, meta): + if segment_info["category_id"] in meta["thing_dataset_id_to_contiguous_id"]: + segment_info["category_id"] = meta["thing_dataset_id_to_contiguous_id"][ + segment_info["category_id"] + ] + else: + segment_info["category_id"] = meta["stuff_dataset_id_to_contiguous_id"][ + segment_info["category_id"] + ] + return segment_info + + assert os.path.exists( + gt_json + ), "Please run `python cityscapesscripts/preparation/createPanopticImgs.py` to generate label files." # noqa + with open(gt_json) as f: + json_info = json.load(f) + files = get_cityscapes_panoptic_files(image_dir, gt_dir, json_info) + ret = [] + for image_file, label_file, segments_info in files: + sem_label_file = ( + image_file.replace("leftImg8bit", "gtFine").split(".")[0] + "_labelTrainIds.png" + ) + segments_info = [_convert_category_id(x, meta) for x in segments_info] + ret.append( + { + "file_name": image_file, + "image_id": "_".join( + os.path.splitext(os.path.basename(image_file))[0].split("_")[:3] + ), + "sem_seg_file_name": sem_label_file, + "pan_seg_file_name": label_file, + "segments_info": segments_info, + } + ) + assert len(ret), f"No images found in {image_dir}!" + assert PathManager.isfile( + ret[0]["sem_seg_file_name"] + ), "Please generate labelTrainIds.png with cityscapesscripts/preparation/createTrainIdLabelImgs.py" # noqa + assert PathManager.isfile( + ret[0]["pan_seg_file_name"] + ), "Please generate panoptic annotation with python cityscapesscripts/preparation/createPanopticImgs.py" # noqa + return ret + + +# rename to avoid conflict +_RAW_CITYSCAPES_PANOPTIC_SPLITS = { + "openvocab_cityscapes_fine_panoptic_train": ( + "cityscapes/leftImg8bit/train", + "cityscapes/gtFine/cityscapes_panoptic_train", + "cityscapes/gtFine/cityscapes_panoptic_train.json", + ), + "openvocab_cityscapes_fine_panoptic_val": ( + "cityscapes/leftImg8bit/val", + "cityscapes/gtFine/cityscapes_panoptic_val", + "cityscapes/gtFine/cityscapes_panoptic_val.json", + ), + # "cityscapes_fine_panoptic_test": not supported yet +} + + +def register_all_cityscapes_panoptic(root): + meta = {} + # The following metadata maps contiguous id from [0, #thing categories + + # #stuff categories) to their names and colors. We have to replica of the + # same name and color under "thing_*" and "stuff_*" because the current + # visualization function in D2 handles thing and class classes differently + # due to some heuristic used in Panoptic FPN. We keep the same naming to + # enable reusing existing visualization functions. + thing_classes = [k["name"] for k in CITYSCAPES_CATEGORIES] + thing_colors = [k["color"] for k in CITYSCAPES_CATEGORIES] + stuff_classes = [k["name"] for k in CITYSCAPES_CATEGORIES] + stuff_colors = [k["color"] for k in CITYSCAPES_CATEGORIES] + + meta["thing_classes"] = thing_classes + meta["thing_colors"] = thing_colors + meta["stuff_classes"] = stuff_classes + meta["stuff_colors"] = stuff_colors + + # There are three types of ids in cityscapes panoptic segmentation: + # (1) category id: like semantic segmentation, it is the class id for each + # pixel. Since there are some classes not used in evaluation, the category + # id is not always contiguous and thus we have two set of category ids: + # - original category id: category id in the original dataset, mainly + # used for evaluation. + # - contiguous category id: [0, #classes), in order to train the classifier + # (2) instance id: this id is used to differentiate different instances from + # the same category. For "stuff" classes, the instance id is always 0; for + # "thing" classes, the instance id starts from 1 and 0 is reserved for + # ignored instances (e.g. crowd annotation). + # (3) panoptic id: this is the compact id that encode both category and + # instance id by: category_id * 1000 + instance_id. + thing_dataset_id_to_contiguous_id = {} + stuff_dataset_id_to_contiguous_id = {} + + for k in CITYSCAPES_CATEGORIES: + if k["isthing"] == 1: + thing_dataset_id_to_contiguous_id[k["id"]] = k["trainId"] + else: + stuff_dataset_id_to_contiguous_id[k["id"]] = k["trainId"] + + meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id + meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id + + for key, (image_dir, gt_dir, gt_json) in _RAW_CITYSCAPES_PANOPTIC_SPLITS.items(): + image_dir = os.path.join(root, image_dir) + gt_dir = os.path.join(root, gt_dir) + gt_json = os.path.join(root, gt_json) + + DatasetCatalog.register( + key, lambda x=image_dir, y=gt_dir, z=gt_json: load_cityscapes_panoptic(x, y, z, meta) + ) + MetadataCatalog.get(key).set( + panoptic_root=gt_dir, + image_root=image_dir, + panoptic_json=gt_json, + gt_dir=gt_dir.replace("cityscapes_panoptic_", ""), + evaluator_type="cityscapes_panoptic_seg", + ignore_label=255, + label_divisor=1000, + **meta, + ) + +_root = os.getenv("DETECTRON2_DATASETS", "datasets") +register_all_cityscapes_panoptic(_root) \ No newline at end of file diff --git a/mask_adapter/data/datasets/register_coco_instance.py b/mask_adapter/data/datasets/register_coco_instance.py new file mode 100644 index 0000000000000000000000000000000000000000..c1b7f3862943b8c6ebe76520cd2532e8f510ba41 --- /dev/null +++ b/mask_adapter/data/datasets/register_coco_instance.py @@ -0,0 +1,61 @@ +""" +This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”). +All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. + +Reference: https://github.com/facebookresearch/detectron2/blob/67ac149947124670f6678e1bdd75f89dbf0dd5e7/detectron2/data/datasets/coco.py +""" + +import json +import logging +import numpy as np +import os +from PIL import Image + +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.data.datasets.coco import load_coco_json, register_coco_instances +from detectron2.utils.file_io import PathManager + +from . import openseg_classes +import copy +COCO_CATEGORIES = openseg_classes.get_coco_categories_with_prompt_eng() +COCO_CATEGORIES = [x for x in COCO_CATEGORIES if x["isthing"] == 1] + +_PREDEFINED_SPLITS = { + # point annotations without masks + "openvocab_coco_2017_train": ( + "coco/train2017", + "coco/annotations/instances_train2017.json", + ), + "openvocab_coco_2017_val": ( + "coco/val2017", + "coco/annotations/instances_val2017.json", + ), +} + + +def _get_coco_instances_meta(): + thing_ids = [k["id"] for k in COCO_CATEGORIES] + assert len(thing_ids) == 80, len(thing_ids) + # Mapping from the incontiguous ADE category id to an id in [0, 99] + thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)} + thing_classes = [k["name"] for k in COCO_CATEGORIES] + ret = { + "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id, + "thing_classes": thing_classes, + } + return ret + + +def register_all_coco_instance(root): + for key, (image_root, json_file) in _PREDEFINED_SPLITS.items(): + # Assume pre-defined datasets live in `./datasets`. + register_coco_instances( + key, + _get_coco_instances_meta(), + os.path.join(root, json_file) if "://" not in json_file else json_file, + os.path.join(root, image_root), + ) + + +_root = os.getenv("DETECTRON2_DATASETS", "datasets") +register_all_coco_instance(_root) diff --git a/mask_adapter/data/datasets/register_coco_panoptic_annos_semseg.py b/mask_adapter/data/datasets/register_coco_panoptic_annos_semseg.py new file mode 100644 index 0000000000000000000000000000000000000000..6cc343b9316e5d095a0ab34af75693be1aec7895 --- /dev/null +++ b/mask_adapter/data/datasets/register_coco_panoptic_annos_semseg.py @@ -0,0 +1,196 @@ +""" +This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”). +All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. + +Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/datasets/register_coco_panoptic_annos_semseg.py +""" + +import json +import os + +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.data.datasets import load_sem_seg +# from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES +from . import openseg_classes + +from detectron2.utils.file_io import PathManager + + +COCO_CATEGORIES = openseg_classes.get_coco_categories_with_prompt_eng() + +_PREDEFINED_SPLITS_COCO_PANOPTIC = { + "openvocab_coco_2017_train_panoptic": ( + # This is the original panoptic annotation directory + "coco/panoptic_train2017", + "coco/annotations/panoptic_train2017.json", + # This directory contains semantic annotations that are + # converted from panoptic annotations. + # It is used by PanopticFPN. + # You can use the script at detectron2/datasets/prepare_panoptic_fpn.py + # to create these directories. + "coco/panoptic_semseg_train2017", + ), + "openvocab_coco_2017_val_panoptic": ( + "coco/panoptic_val2017", + "coco/annotations/panoptic_val2017.json", + "coco/panoptic_semseg_val2017", + ), +} + + +def get_metadata(): + meta = {} + # The following metadata maps contiguous id from [0, #thing categories + + # #stuff categories) to their names and colors. We have to replica of the + # same name and color under "thing_*" and "stuff_*" because the current + # visualization function in D2 handles thing and class classes differently + # due to some heuristic used in Panoptic FPN. We keep the same naming to + # enable reusing existing visualization functions. + thing_classes = [k["name"] for k in COCO_CATEGORIES if k["isthing"] == 1] + thing_colors = [k["color"] for k in COCO_CATEGORIES if k["isthing"] == 1] + stuff_classes = [k["name"] for k in COCO_CATEGORIES] + stuff_colors = [k["color"] for k in COCO_CATEGORIES] + + meta["thing_classes"] = thing_classes + meta["thing_colors"] = thing_colors + meta["stuff_classes"] = stuff_classes + meta["stuff_colors"] = stuff_colors + + # Convert category id for training: + # category id: like semantic segmentation, it is the class id for each + # pixel. Since there are some classes not used in evaluation, the category + # id is not always contiguous and thus we have two set of category ids: + # - original category id: category id in the original dataset, mainly + # used for evaluation. + # - contiguous category id: [0, #classes), in order to train the linear + # softmax classifier. + thing_dataset_id_to_contiguous_id = {} + stuff_dataset_id_to_contiguous_id = {} + contiguous_id_to_class_name = [] + + for i, cat in enumerate(COCO_CATEGORIES): + if cat["isthing"]: + thing_dataset_id_to_contiguous_id[cat["id"]] = i + # else: + # stuff_dataset_id_to_contiguous_id[cat["id"]] = i + + # in order to use sem_seg evaluator + stuff_dataset_id_to_contiguous_id[cat["id"]] = i + + contiguous_id_to_class_name.append(cat["name"]) + + meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id + meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id + meta["contiguous_id_to_class_name"] = contiguous_id_to_class_name + + return meta + + +def load_coco_panoptic_json(json_file, image_dir, gt_dir, semseg_dir, meta, semantic_name): + """ + Args: + image_dir (str): path to the raw dataset. e.g., "~/coco/train2017". + gt_dir (str): path to the raw annotations. e.g., "~/coco/panoptic_train2017". + json_file (str): path to the json file. e.g., "~/coco/annotations/panoptic_train2017.json". + Returns: + list[dict]: a list of dicts in Detectron2 standard format. (See + `Using Custom Datasets `_ ) + """ + + def _convert_category_id(segment_info, meta): + if segment_info["category_id"] in meta["thing_dataset_id_to_contiguous_id"]: + segment_info["category_id"] = meta["thing_dataset_id_to_contiguous_id"][ + segment_info["category_id"] + ] + segment_info["isthing"] = True + else: + segment_info["category_id"] = meta["stuff_dataset_id_to_contiguous_id"][ + segment_info["category_id"] + ] + segment_info["isthing"] = False + return segment_info + + with PathManager.open(json_file) as f: + json_info = json.load(f) + + ret = [] + for ann in json_info["annotations"]: + image_id = int(ann["image_id"]) + # TODO: currently we assume image and label has the same filename but + # different extension, and images have extension ".jpg" for COCO. Need + # to make image extension a user-provided argument if we extend this + # function to support other COCO-like datasets. + image_file = os.path.join(image_dir, os.path.splitext(ann["file_name"])[0] + ".jpg") + label_file = os.path.join(gt_dir, ann["file_name"]) + sem_label_file = os.path.join(semseg_dir, ann["file_name"]) + segments_info = [_convert_category_id(x, meta) for x in ann["segments_info"]] + ret.append( + { + "file_name": image_file, + "image_id": image_id, + "pan_seg_file_name": label_file, + "sem_seg_file_name": sem_label_file, + "segments_info": segments_info, + "dataname": semantic_name, + } + ) + assert len(ret), f"No images found in {image_dir}!" + assert PathManager.isfile(ret[0]["file_name"]), ret[0]["file_name"] + assert PathManager.isfile(ret[0]["pan_seg_file_name"]), ret[0]["pan_seg_file_name"] + assert PathManager.isfile(ret[0]["sem_seg_file_name"]), ret[0]["sem_seg_file_name"] + return ret + + +def register_coco_panoptic_annos_sem_seg( + name, metadata, image_root, panoptic_root, panoptic_json, sem_seg_root, instances_json +): + panoptic_name = name + #delattr(MetadataCatalog.get(panoptic_name), "thing_classes") + #delattr(MetadataCatalog.get(panoptic_name), "thing_colors") + MetadataCatalog.get(panoptic_name).set( + thing_classes=metadata["thing_classes"], + thing_colors=metadata["thing_colors"], + # thing_dataset_id_to_contiguous_id=metadata["thing_dataset_id_to_contiguous_id"], + ) + + # the name is "coco_2017_train_panoptic_with_sem_seg" and "coco_2017_val_panoptic_with_sem_seg" + semantic_name = name + "_with_sem_seg" + DatasetCatalog.register( + semantic_name, + lambda: load_coco_panoptic_json(panoptic_json, image_root, panoptic_root, sem_seg_root, metadata, semantic_name), + ) + MetadataCatalog.get(semantic_name).set( + sem_seg_root=sem_seg_root, + panoptic_root=panoptic_root, + image_root=image_root, + panoptic_json=panoptic_json, + json_file=instances_json, + evaluator_type="coco_panoptic_seg", + ignore_label=255, + label_divisor=1000, + **metadata, + ) + + +def register_all_coco_panoptic_annos_sem_seg(root): + for ( + prefix, + (panoptic_root, panoptic_json, semantic_root), + ) in _PREDEFINED_SPLITS_COCO_PANOPTIC.items(): + prefix_instances = prefix[: -len("_panoptic")].replace("openvocab_", "") + instances_meta = MetadataCatalog.get(prefix_instances) + image_root, instances_json = instances_meta.image_root, instances_meta.json_file + + register_coco_panoptic_annos_sem_seg( + prefix, + get_metadata(), + image_root, + os.path.join(root, panoptic_root), + os.path.join(root, panoptic_json), + os.path.join(root, semantic_root), + instances_json, + ) + + +_root = os.getenv("DETECTRON2_DATASETS", "datasets") +register_all_coco_panoptic_annos_sem_seg(_root) \ No newline at end of file diff --git a/mask_adapter/data/datasets/register_coco_stuff_164k.py b/mask_adapter/data/datasets/register_coco_stuff_164k.py new file mode 100644 index 0000000000000000000000000000000000000000..4763fe2df2d705a81e82d432a4e448a3f7ddc3eb --- /dev/null +++ b/mask_adapter/data/datasets/register_coco_stuff_164k.py @@ -0,0 +1,63 @@ +""" +This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”). +All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. + +Reference: https://github.com/MendelXu/SAN/blob/main/san/data/datasets/register_coco_stuff_164k.py +""" + +import os + +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.data.datasets import load_sem_seg + +from . import openseg_classes + +COCO_CATEGORIES = openseg_classes.get_coco_stuff_categories_with_prompt_eng() + + +def _get_coco_stuff_meta(): + # Id 0 is reserved for ignore_label, we change ignore_label for 0 + # to 255 in our pre-processing. + stuff_ids = [k["id"] for k in COCO_CATEGORIES] + assert len(stuff_ids) == 171, len(stuff_ids) + + # For semantic segmentation, this mapping maps from contiguous stuff id + # (in [0, 91], used in models) to ids in the dataset (used for processing results) + stuff_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(stuff_ids)} + stuff_classes = [k["name"] for k in COCO_CATEGORIES] + + ret = { + "stuff_dataset_id_to_contiguous_id": stuff_dataset_id_to_contiguous_id, + "stuff_classes": stuff_classes, + } + return ret + + +def register_all_coco_stuff_164k(root): + root = os.path.join(root, "coco") + meta = _get_coco_stuff_meta() + + for name, image_dirname, sem_seg_dirname in [ + ("train", "train2017", "stuffthingmaps_detectron2/train2017"), + ("test", "val2017", "stuffthingmaps_detectron2/val2017"), + ]: + image_dir = os.path.join(root, image_dirname) + gt_dir = os.path.join(root, sem_seg_dirname) + all_name = f"openvocab_coco_2017_{name}_stuff_sem_seg" + DatasetCatalog.register( + all_name, + lambda x=image_dir, y=gt_dir: load_sem_seg( + y, x, gt_ext="png", image_ext="jpg" + ), + ) + MetadataCatalog.get(all_name).set( + image_root=image_dir, + sem_seg_root=gt_dir, + evaluator_type="sem_seg", + ignore_label=255, + **meta, + ) + + +_root = os.getenv("DETECTRON2_DATASETS", "datasets") +register_all_coco_stuff_164k(_root) \ No newline at end of file diff --git a/mask_adapter/data/datasets/register_mapillary_vistas_panoptic.py b/mask_adapter/data/datasets/register_mapillary_vistas_panoptic.py new file mode 100644 index 0000000000000000000000000000000000000000..e89948fdd9e6051ff47215f46374e7981cc1a7ae --- /dev/null +++ b/mask_adapter/data/datasets/register_mapillary_vistas_panoptic.py @@ -0,0 +1,188 @@ +""" +This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”). +All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. + +Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/datasets/register_mapillary_vistas_panoptic.py +""" + +import json +import os + +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.utils.file_io import PathManager + +from . import openseg_classes + +MAPILLARY_VISTAS_SEM_SEG_CATEGORIES = openseg_classes.get_mapillary_vistas_categories_with_prompt_eng() + +def load_mapillary_vistas_panoptic_json(json_file, image_dir, gt_dir, semseg_dir, meta): + """ + Args: + image_dir (str): path to the raw dataset. e.g., "~/coco/train2017". + gt_dir (str): path to the raw annotations. e.g., "~/coco/panoptic_train2017". + json_file (str): path to the json file. e.g., "~/coco/annotations/panoptic_train2017.json". + Returns: + list[dict]: a list of dicts in Detectron2 standard format. (See + `Using Custom Datasets `_ ) + """ + + def _convert_category_id(segment_info, meta): + if segment_info["category_id"] in meta["thing_dataset_id_to_contiguous_id"]: + segment_info["category_id"] = meta["thing_dataset_id_to_contiguous_id"][ + segment_info["category_id"] + ] + segment_info["isthing"] = True + else: + segment_info["category_id"] = meta["stuff_dataset_id_to_contiguous_id"][ + segment_info["category_id"] + ] + segment_info["isthing"] = False + return segment_info + + with PathManager.open(json_file) as f: + json_info = json.load(f) + + ret = [] + for ann in json_info["annotations"]: + image_id = ann["image_id"] + # TODO: currently we assume image and label has the same filename but + # different extension, and images have extension ".jpg" for COCO. Need + # to make image extension a user-provided argument if we extend this + # function to support other COCO-like datasets. + image_file = os.path.join(image_dir, os.path.splitext(ann["file_name"])[0] + ".jpg") + label_file = os.path.join(gt_dir, ann["file_name"]) + sem_label_file = os.path.join(semseg_dir, ann["file_name"]) + segments_info = [_convert_category_id(x, meta) for x in ann["segments_info"]] + ret.append( + { + "file_name": image_file, + "image_id": image_id, + "pan_seg_file_name": label_file, + "sem_seg_file_name": sem_label_file, + "segments_info": segments_info, + } + ) + assert len(ret), f"No images found in {image_dir}!" + assert PathManager.isfile(ret[0]["file_name"]), ret[0]["file_name"] + assert PathManager.isfile(ret[0]["pan_seg_file_name"]), ret[0]["pan_seg_file_name"] + assert PathManager.isfile(ret[0]["sem_seg_file_name"]), ret[0]["sem_seg_file_name"] + return ret + + +def register_mapillary_vistas_panoptic( + name, metadata, image_root, panoptic_root, semantic_root, panoptic_json, instances_json=None +): + """ + Register a "standard" version of ADE20k panoptic segmentation dataset named `name`. + The dictionaries in this registered dataset follows detectron2's standard format. + Hence it's called "standard". + Args: + name (str): the name that identifies a dataset, + e.g. "ade20k_panoptic_train" + metadata (dict): extra metadata associated with this dataset. + image_root (str): directory which contains all the images + panoptic_root (str): directory which contains panoptic annotation images in COCO format + panoptic_json (str): path to the json panoptic annotation file in COCO format + sem_seg_root (none): not used, to be consistent with + `register_coco_panoptic_separated`. + instances_json (str): path to the json instance annotation file + """ + panoptic_name = name + DatasetCatalog.register( + panoptic_name, + lambda: load_mapillary_vistas_panoptic_json( + panoptic_json, image_root, panoptic_root, semantic_root, metadata + ), + ) + MetadataCatalog.get(panoptic_name).set( + panoptic_root=panoptic_root, + image_root=image_root, + panoptic_json=panoptic_json, + json_file=instances_json, + evaluator_type="mapillary_vistas_panoptic_seg", + ignore_label=65, # different from other datasets, Mapillary Vistas sets ignore_label to 65 + label_divisor=1000, + **metadata, + ) + + +_PREDEFINED_SPLITS_ADE20K_PANOPTIC = { + "openvocab_mapillary_vistas_panoptic_train": ( + "mapillary_vistas/training/images", + "mapillary_vistas/training/panoptic", + "mapillary_vistas/training/panoptic/panoptic_2018.json", + "mapillary_vistas/training/labels", + ), + "openvocab_mapillary_vistas_panoptic_val": ( + "mapillary_vistas/validation/images", + "mapillary_vistas/validation/panoptic", + "mapillary_vistas/validation/panoptic/panoptic_2018.json", + "mapillary_vistas/validation/labels", + ), +} + + +def get_metadata(): + meta = {} + # The following metadata maps contiguous id from [0, #thing categories + + # #stuff categories) to their names and colors. We have to replica of the + # same name and color under "thing_*" and "stuff_*" because the current + # visualization function in D2 handles thing and class classes differently + # due to some heuristic used in Panoptic FPN. We keep the same naming to + # enable reusing existing visualization functions. + thing_classes = [k["name"] for k in MAPILLARY_VISTAS_SEM_SEG_CATEGORIES] + thing_colors = [k["color"] for k in MAPILLARY_VISTAS_SEM_SEG_CATEGORIES] + stuff_classes = [k["name"] for k in MAPILLARY_VISTAS_SEM_SEG_CATEGORIES] + stuff_colors = [k["color"] for k in MAPILLARY_VISTAS_SEM_SEG_CATEGORIES] + + meta["thing_classes"] = thing_classes + meta["thing_colors"] = thing_colors + meta["stuff_classes"] = stuff_classes + meta["stuff_colors"] = stuff_colors + + # Convert category id for training: + # category id: like semantic segmentation, it is the class id for each + # pixel. Since there are some classes not used in evaluation, the category + # id is not always contiguous and thus we have two set of category ids: + # - original category id: category id in the original dataset, mainly + # used for evaluation. + # - contiguous category id: [0, #classes), in order to train the linear + # softmax classifier. + thing_dataset_id_to_contiguous_id = {} + stuff_dataset_id_to_contiguous_id = {} + + for i, cat in enumerate(MAPILLARY_VISTAS_SEM_SEG_CATEGORIES): + if cat["isthing"]: + thing_dataset_id_to_contiguous_id[cat["id"]] = i + # else: + # stuff_dataset_id_to_contiguous_id[cat["id"]] = i + + # in order to use sem_seg evaluator + stuff_dataset_id_to_contiguous_id[cat["id"]] = i + + meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id + meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id + + return meta + + +def register_all_mapillary_vistas_panoptic(root): + metadata = get_metadata() + for ( + prefix, + (image_root, panoptic_root, panoptic_json, semantic_root), + ) in _PREDEFINED_SPLITS_ADE20K_PANOPTIC.items(): + # The "standard" version of COCO panoptic segmentation dataset, + # e.g. used by Panoptic-DeepLab + register_mapillary_vistas_panoptic( + prefix, + metadata, + os.path.join(root, image_root), + os.path.join(root, panoptic_root), + os.path.join(root, semantic_root), + os.path.join(root, panoptic_json), + ) + + +_root = os.getenv("DETECTRON2_DATASETS", "datasets") +register_all_mapillary_vistas_panoptic(_root) \ No newline at end of file diff --git a/mask_adapter/data/datasets/register_objects365.py b/mask_adapter/data/datasets/register_objects365.py new file mode 100644 index 0000000000000000000000000000000000000000..8e8ce99cecb2ed83723d1313f6e4d72bd6b13d95 --- /dev/null +++ b/mask_adapter/data/datasets/register_objects365.py @@ -0,0 +1,409 @@ +from detectron2.data.datasets.register_coco import register_coco_instances +import os + +categories = [ +{'id': 1, 'name': 'person'}, + {'id': 2, 'name': 'sneakers'}, + {'id': 3, 'name': 'chair'}, + {'id': 4, 'name': 'hat'}, + {'id': 5, 'name': 'lamp'}, + {'id': 6, 'name': 'bottle'}, + {'id': 7, 'name': 'cabinet/shelf'}, + {'id': 8, 'name': 'cup'}, + {'id': 9, 'name': 'car'}, + {'id': 10, 'name': 'glasses'}, + {'id': 11, 'name': 'picture/frame'}, + {'id': 12, 'name': 'desk'}, + {'id': 13, 'name': 'handbag'}, + {'id': 14, 'name': 'street lights'}, + {'id': 15, 'name': 'book'}, + {'id': 16, 'name': 'plate'}, + {'id': 17, 'name': 'helmet'}, + {'id': 18, 'name': 'leather shoes'}, + {'id': 19, 'name': 'pillow'}, + {'id': 20, 'name': 'glove'}, + {'id': 21, 'name': 'potted plant'}, + {'id': 22, 'name': 'bracelet'}, + {'id': 23, 'name': 'flower'}, + {'id': 24, 'name': 'tv'}, + {'id': 25, 'name': 'storage box'}, + {'id': 26, 'name': 'vase'}, + {'id': 27, 'name': 'bench'}, + {'id': 28, 'name': 'wine glass'}, + {'id': 29, 'name': 'boots'}, + {'id': 30, 'name': 'bowl'}, + {'id': 31, 'name': 'dining table'}, + {'id': 32, 'name': 'umbrella'}, + {'id': 33, 'name': 'boat'}, + {'id': 34, 'name': 'flag'}, + {'id': 35, 'name': 'speaker'}, + {'id': 36, 'name': 'trash bin/can'}, + {'id': 37, 'name': 'stool'}, + {'id': 38, 'name': 'backpack'}, + {'id': 39, 'name': 'couch'}, + {'id': 40, 'name': 'belt'}, + {'id': 41, 'name': 'carpet'}, + {'id': 42, 'name': 'basket'}, + {'id': 43, 'name': 'towel/napkin'}, + {'id': 44, 'name': 'slippers'}, + {'id': 45, 'name': 'barrel/bucket'}, + {'id': 46, 'name': 'coffee table'}, + {'id': 47, 'name': 'suv'}, + {'id': 48, 'name': 'toy'}, + {'id': 49, 'name': 'tie'}, + {'id': 50, 'name': 'bed'}, + {'id': 51, 'name': 'traffic light'}, + {'id': 52, 'name': 'pen/pencil'}, + {'id': 53, 'name': 'microphone'}, + {'id': 54, 'name': 'sandals'}, + {'id': 55, 'name': 'canned'}, + {'id': 56, 'name': 'necklace'}, + {'id': 57, 'name': 'mirror'}, + {'id': 58, 'name': 'faucet'}, + {'id': 59, 'name': 'bicycle'}, + {'id': 60, 'name': 'bread'}, + {'id': 61, 'name': 'high heels'}, + {'id': 62, 'name': 'ring'}, + {'id': 63, 'name': 'van'}, + {'id': 64, 'name': 'watch'}, + {'id': 65, 'name': 'sink'}, + {'id': 66, 'name': 'horse'}, + {'id': 67, 'name': 'fish'}, + {'id': 68, 'name': 'apple'}, + {'id': 69, 'name': 'camera'}, + {'id': 70, 'name': 'candle'}, + {'id': 71, 'name': 'teddy bear'}, + {'id': 72, 'name': 'cake'}, + {'id': 73, 'name': 'motorcycle'}, + {'id': 74, 'name': 'wild bird'}, + {'id': 75, 'name': 'laptop'}, + {'id': 76, 'name': 'knife'}, + {'id': 77, 'name': 'traffic sign'}, + {'id': 78, 'name': 'cell phone'}, + {'id': 79, 'name': 'paddle'}, + {'id': 80, 'name': 'truck'}, + {'id': 81, 'name': 'cow'}, + {'id': 82, 'name': 'power outlet'}, + {'id': 83, 'name': 'clock'}, + {'id': 84, 'name': 'drum'}, + {'id': 85, 'name': 'fork'}, + {'id': 86, 'name': 'bus'}, + {'id': 87, 'name': 'hanger'}, + {'id': 88, 'name': 'nightstand'}, + {'id': 89, 'name': 'pot/pan'}, + {'id': 90, 'name': 'sheep'}, + {'id': 91, 'name': 'guitar'}, + {'id': 92, 'name': 'traffic cone'}, + {'id': 93, 'name': 'tea pot'}, + {'id': 94, 'name': 'keyboard'}, + {'id': 95, 'name': 'tripod'}, + {'id': 96, 'name': 'hockey'}, + {'id': 97, 'name': 'fan'}, + {'id': 98, 'name': 'dog'}, + {'id': 99, 'name': 'spoon'}, + {'id': 100, 'name': 'blackboard/whiteboard'}, + {'id': 101, 'name': 'balloon'}, + {'id': 102, 'name': 'air conditioner'}, + {'id': 103, 'name': 'cymbal'}, + {'id': 104, 'name': 'mouse'}, + {'id': 105, 'name': 'telephone'}, + {'id': 106, 'name': 'pickup truck'}, + {'id': 107, 'name': 'orange'}, + {'id': 108, 'name': 'banana'}, + {'id': 109, 'name': 'airplane'}, + {'id': 110, 'name': 'luggage'}, + {'id': 111, 'name': 'skis'}, + {'id': 112, 'name': 'soccer'}, + {'id': 113, 'name': 'trolley'}, + {'id': 114, 'name': 'oven'}, + {'id': 115, 'name': 'remote'}, + {'id': 116, 'name': 'baseball glove'}, + {'id': 117, 'name': 'paper towel'}, + {'id': 118, 'name': 'refrigerator'}, + {'id': 119, 'name': 'train'}, + {'id': 120, 'name': 'tomato'}, + {'id': 121, 'name': 'machinery vehicle'}, + {'id': 122, 'name': 'tent'}, + {'id': 123, 'name': 'shampoo/shower gel'}, + {'id': 124, 'name': 'head phone'}, + {'id': 125, 'name': 'lantern'}, + {'id': 126, 'name': 'donut'}, + {'id': 127, 'name': 'cleaning products'}, + {'id': 128, 'name': 'sailboat'}, + {'id': 129, 'name': 'tangerine'}, + {'id': 130, 'name': 'pizza'}, + {'id': 131, 'name': 'kite'}, + {'id': 132, 'name': 'computer box'}, + {'id': 133, 'name': 'elephant'}, + {'id': 134, 'name': 'toiletries'}, + {'id': 135, 'name': 'gas stove'}, + {'id': 136, 'name': 'broccoli'}, + {'id': 137, 'name': 'toilet'}, + {'id': 138, 'name': 'stroller'}, + {'id': 139, 'name': 'shovel'}, + {'id': 140, 'name': 'baseball bat'}, + {'id': 141, 'name': 'microwave'}, + {'id': 142, 'name': 'skateboard'}, + {'id': 143, 'name': 'surfboard'}, + {'id': 144, 'name': 'surveillance camera'}, + {'id': 145, 'name': 'gun'}, + {'id': 146, 'name': 'life saver'}, + {'id': 147, 'name': 'cat'}, + {'id': 148, 'name': 'lemon'}, + {'id': 149, 'name': 'liquid soap'}, + {'id': 150, 'name': 'zebra'}, + {'id': 151, 'name': 'duck'}, + {'id': 152, 'name': 'sports car'}, + {'id': 153, 'name': 'giraffe'}, + {'id': 154, 'name': 'pumpkin'}, + {'id': 155, 'name': 'piano'}, + {'id': 156, 'name': 'stop sign'}, + {'id': 157, 'name': 'radiator'}, + {'id': 158, 'name': 'converter'}, + {'id': 159, 'name': 'tissue '}, + {'id': 160, 'name': 'carrot'}, + {'id': 161, 'name': 'washing machine'}, + {'id': 162, 'name': 'vent'}, + {'id': 163, 'name': 'cookies'}, + {'id': 164, 'name': 'cutting/chopping board'}, + {'id': 165, 'name': 'tennis racket'}, + {'id': 166, 'name': 'candy'}, + {'id': 167, 'name': 'skating and skiing shoes'}, + {'id': 168, 'name': 'scissors'}, + {'id': 169, 'name': 'folder'}, + {'id': 170, 'name': 'baseball'}, + {'id': 171, 'name': 'strawberry'}, + {'id': 172, 'name': 'bow tie'}, + {'id': 173, 'name': 'pigeon'}, + {'id': 174, 'name': 'pepper'}, + {'id': 175, 'name': 'coffee machine'}, + {'id': 176, 'name': 'bathtub'}, + {'id': 177, 'name': 'snowboard'}, + {'id': 178, 'name': 'suitcase'}, + {'id': 179, 'name': 'grapes'}, + {'id': 180, 'name': 'ladder'}, + {'id': 181, 'name': 'pear'}, + {'id': 182, 'name': 'american football'}, + {'id': 183, 'name': 'basketball'}, + {'id': 184, 'name': 'potato'}, + {'id': 185, 'name': 'paint brush'}, + {'id': 186, 'name': 'printer'}, + {'id': 187, 'name': 'billiards'}, + {'id': 188, 'name': 'fire hydrant'}, + {'id': 189, 'name': 'goose'}, + {'id': 190, 'name': 'projector'}, + {'id': 191, 'name': 'sausage'}, + {'id': 192, 'name': 'fire extinguisher'}, + {'id': 193, 'name': 'extension cord'}, + {'id': 194, 'name': 'facial mask'}, + {'id': 195, 'name': 'tennis ball'}, + {'id': 196, 'name': 'chopsticks'}, + {'id': 197, 'name': 'electronic stove and gas stove'}, + {'id': 198, 'name': 'pie'}, + {'id': 199, 'name': 'frisbee'}, + {'id': 200, 'name': 'kettle'}, + {'id': 201, 'name': 'hamburger'}, + {'id': 202, 'name': 'golf club'}, + {'id': 203, 'name': 'cucumber'}, + {'id': 204, 'name': 'clutch'}, + {'id': 205, 'name': 'blender'}, + {'id': 206, 'name': 'tong'}, + {'id': 207, 'name': 'slide'}, + {'id': 208, 'name': 'hot dog'}, + {'id': 209, 'name': 'toothbrush'}, + {'id': 210, 'name': 'facial cleanser'}, + {'id': 211, 'name': 'mango'}, + {'id': 212, 'name': 'deer'}, + {'id': 213, 'name': 'egg'}, + {'id': 214, 'name': 'violin'}, + {'id': 215, 'name': 'marker'}, + {'id': 216, 'name': 'ship'}, + {'id': 217, 'name': 'chicken'}, + {'id': 218, 'name': 'onion'}, + {'id': 219, 'name': 'ice cream'}, + {'id': 220, 'name': 'tape'}, + {'id': 221, 'name': 'wheelchair'}, + {'id': 222, 'name': 'plum'}, + {'id': 223, 'name': 'bar soap'}, + {'id': 224, 'name': 'scale'}, + {'id': 225, 'name': 'watermelon'}, + {'id': 226, 'name': 'cabbage'}, + {'id': 227, 'name': 'router/modem'}, + {'id': 228, 'name': 'golf ball'}, + {'id': 229, 'name': 'pine apple'}, + {'id': 230, 'name': 'crane'}, + {'id': 231, 'name': 'fire truck'}, + {'id': 232, 'name': 'peach'}, + {'id': 233, 'name': 'cello'}, + {'id': 234, 'name': 'notepaper'}, + {'id': 235, 'name': 'tricycle'}, + {'id': 236, 'name': 'toaster'}, + {'id': 237, 'name': 'helicopter'}, + {'id': 238, 'name': 'green beans'}, + {'id': 239, 'name': 'brush'}, + {'id': 240, 'name': 'carriage'}, + {'id': 241, 'name': 'cigar'}, + {'id': 242, 'name': 'earphone'}, + {'id': 243, 'name': 'penguin'}, + {'id': 244, 'name': 'hurdle'}, + {'id': 245, 'name': 'swing'}, + {'id': 246, 'name': 'radio'}, + {'id': 247, 'name': 'CD'}, + {'id': 248, 'name': 'parking meter'}, + {'id': 249, 'name': 'swan'}, + {'id': 250, 'name': 'garlic'}, + {'id': 251, 'name': 'french fries'}, + {'id': 252, 'name': 'horn'}, + {'id': 253, 'name': 'avocado'}, + {'id': 254, 'name': 'saxophone'}, + {'id': 255, 'name': 'trumpet'}, + {'id': 256, 'name': 'sandwich'}, + {'id': 257, 'name': 'cue'}, + {'id': 258, 'name': 'kiwi fruit'}, + {'id': 259, 'name': 'bear'}, + {'id': 260, 'name': 'fishing rod'}, + {'id': 261, 'name': 'cherry'}, + {'id': 262, 'name': 'tablet'}, + {'id': 263, 'name': 'green vegetables'}, + {'id': 264, 'name': 'nuts'}, + {'id': 265, 'name': 'corn'}, + {'id': 266, 'name': 'key'}, + {'id': 267, 'name': 'screwdriver'}, + {'id': 268, 'name': 'globe'}, + {'id': 269, 'name': 'broom'}, + {'id': 270, 'name': 'pliers'}, + {'id': 271, 'name': 'volleyball'}, + {'id': 272, 'name': 'hammer'}, + {'id': 273, 'name': 'eggplant'}, + {'id': 274, 'name': 'trophy'}, + {'id': 275, 'name': 'dates'}, + {'id': 276, 'name': 'board eraser'}, + {'id': 277, 'name': 'rice'}, + {'id': 278, 'name': 'tape measure/ruler'}, + {'id': 279, 'name': 'dumbbell'}, + {'id': 280, 'name': 'hamimelon'}, + {'id': 281, 'name': 'stapler'}, + {'id': 282, 'name': 'camel'}, + {'id': 283, 'name': 'lettuce'}, + {'id': 284, 'name': 'goldfish'}, + {'id': 285, 'name': 'meat balls'}, + {'id': 286, 'name': 'medal'}, + {'id': 287, 'name': 'toothpaste'}, + {'id': 288, 'name': 'antelope'}, + {'id': 289, 'name': 'shrimp'}, + {'id': 290, 'name': 'rickshaw'}, + {'id': 291, 'name': 'trombone'}, + {'id': 292, 'name': 'pomegranate'}, + {'id': 293, 'name': 'coconut'}, + {'id': 294, 'name': 'jellyfish'}, + {'id': 295, 'name': 'mushroom'}, + {'id': 296, 'name': 'calculator'}, + {'id': 297, 'name': 'treadmill'}, + {'id': 298, 'name': 'butterfly'}, + {'id': 299, 'name': 'egg tart'}, + {'id': 300, 'name': 'cheese'}, + {'id': 301, 'name': 'pig'}, + {'id': 302, 'name': 'pomelo'}, + {'id': 303, 'name': 'race car'}, + {'id': 304, 'name': 'rice cooker'}, + {'id': 305, 'name': 'tuba'}, + {'id': 306, 'name': 'crosswalk sign'}, + {'id': 307, 'name': 'papaya'}, + {'id': 308, 'name': 'hair drier'}, + {'id': 309, 'name': 'green onion'}, + {'id': 310, 'name': 'chips'}, + {'id': 311, 'name': 'dolphin'}, + {'id': 312, 'name': 'sushi'}, + {'id': 313, 'name': 'urinal'}, + {'id': 314, 'name': 'donkey'}, + {'id': 315, 'name': 'electric drill'}, + {'id': 316, 'name': 'spring rolls'}, + {'id': 317, 'name': 'tortoise/turtle'}, + {'id': 318, 'name': 'parrot'}, + {'id': 319, 'name': 'flute'}, + {'id': 320, 'name': 'measuring cup'}, + {'id': 321, 'name': 'shark'}, + {'id': 322, 'name': 'steak'}, + {'id': 323, 'name': 'poker card'}, + {'id': 324, 'name': 'binoculars'}, + {'id': 325, 'name': 'llama'}, + {'id': 326, 'name': 'radish'}, + {'id': 327, 'name': 'noodles'}, + {'id': 328, 'name': 'yak'}, + {'id': 329, 'name': 'mop'}, + {'id': 330, 'name': 'crab'}, + {'id': 331, 'name': 'microscope'}, + {'id': 332, 'name': 'barbell'}, + {'id': 333, 'name': 'bread/bun'}, + {'id': 334, 'name': 'baozi'}, + {'id': 335, 'name': 'lion'}, + {'id': 336, 'name': 'red cabbage'}, + {'id': 337, 'name': 'polar bear'}, + {'id': 338, 'name': 'lighter'}, + {'id': 339, 'name': 'seal'}, + {'id': 340, 'name': 'mangosteen'}, + {'id': 341, 'name': 'comb'}, + {'id': 342, 'name': 'eraser'}, + {'id': 343, 'name': 'pitaya'}, + {'id': 344, 'name': 'scallop'}, + {'id': 345, 'name': 'pencil case'}, + {'id': 346, 'name': 'saw'}, + {'id': 347, 'name': 'table tennis paddle'}, + {'id': 348, 'name': 'okra'}, + {'id': 349, 'name': 'starfish'}, + {'id': 350, 'name': 'eagle'}, + {'id': 351, 'name': 'monkey'}, + {'id': 352, 'name': 'durian'}, + {'id': 353, 'name': 'game board'}, + {'id': 354, 'name': 'rabbit'}, + {'id': 355, 'name': 'french horn'}, + {'id': 356, 'name': 'ambulance'}, + {'id': 357, 'name': 'asparagus'}, + {'id': 358, 'name': 'hoverboard'}, + {'id': 359, 'name': 'pasta'}, + {'id': 360, 'name': 'target'}, + {'id': 361, 'name': 'hotair balloon'}, + {'id': 362, 'name': 'chainsaw'}, + {'id': 363, 'name': 'lobster'}, + {'id': 364, 'name': 'iron'}, + {'id': 365, 'name': 'flashlight'}] + +def _get_builtin_metadata_obj365v1(): + id_to_name = {x['id']: x['name'] for x in categories} + thing_dataset_id_to_contiguous_id = {i + 1: i for i in range(365)} + thing_classes = [id_to_name[k] for k in sorted(id_to_name)] + return { + "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id, + "thing_classes": thing_classes} + +_PREDEFINED_SPLITS_OBJECTS365 = { + "objects365_train": ("objects365/train", "objects365/annotations/objects365_train.json"), + "objects365_val": ("objects365/val", "objects365/annotations/objects365_val.json"), +} + +# for key, (image_root, json_file) in _PREDEFINED_SPLITS_OBJECTS365.items(): +# register_coco_instances( +# key, +# _get_builtin_metadata(), +# os.path.join("datasets", json_file) if "://" not in json_file else json_file, +# os.path.join("datasets", image_root), +# ) + +_PREDEFINED_SPLITS_OBJECTS365V1 = { + "objects365_v1_train": ("Objects365v1/train", "Objects365v1/objects365_train.json"), + "objects365_v1_masktrain": ("Objects365v1/train", "Objects365v1/annotations/filtered_objects365_v1_train_with_mask.json"), + "objects365_v1_val": ("Objects365v1/val/val", "Objects365v1/objects365_val.json"), + "objects365_v1_val_mini": ("Objects365v1/val/val", "Objects365v1/objects365_val_mini.json"), +} + +def register_all_obj365v1(root): + for key, (image_root, json_file) in _PREDEFINED_SPLITS_OBJECTS365V1.items(): + register_coco_instances( + key, + _get_builtin_metadata_obj365v1(), + os.path.join(root, json_file) if "://" not in json_file else json_file, + os.path.join(root, image_root), + ) + +_root = os.getenv("DETECTRON2_DATASETS", "datasets") +register_all_obj365v1(_root) \ No newline at end of file diff --git a/mask_adapter/data/datasets/register_pascal_ctx_459_sem_seg.py b/mask_adapter/data/datasets/register_pascal_ctx_459_sem_seg.py new file mode 100644 index 0000000000000000000000000000000000000000..a6f2d2132d4ceec003c75e2f0c9ae6682fbcb315 --- /dev/null +++ b/mask_adapter/data/datasets/register_pascal_ctx_459_sem_seg.py @@ -0,0 +1,81 @@ +""" +Copyright (2023) Bytedance Ltd. and/or its affiliates + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import os + +import numpy as np + +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.data.datasets import load_sem_seg + +from . import openseg_classes + +PASCAL_CTX_459_CATEGORIES=openseg_classes.get_pascal_ctx_459_categories_with_prompt_eng() + +PASCAL_CTX_459_COLORS = [k["color"] for k in PASCAL_CTX_459_CATEGORIES] + +MetadataCatalog.get("openvocab_pascal_ctx459_sem_seg_train").set( + stuff_colors=PASCAL_CTX_459_COLORS[:], +) + +MetadataCatalog.get("openvocab_pascal_ctx459_sem_seg_val").set( + stuff_colors=PASCAL_CTX_459_COLORS[:], +) + +def _get_ctx459_meta(): + # Id 0 is reserved for ignore_label, we change ignore_label for 0 + # to 255 in our pre-processing, so all ids are shifted by 1. + stuff_ids = [k["id"] for k in PASCAL_CTX_459_CATEGORIES] + assert len(stuff_ids) == 459, len(stuff_ids) + + # For semantic segmentation, this mapping maps from contiguous stuff id + # (in [0, 91], used in models) to ids in the dataset (used for processing results) + stuff_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(stuff_ids)} + stuff_classes = [k["name"] for k in PASCAL_CTX_459_CATEGORIES] + + ret = { + "stuff_dataset_id_to_contiguous_id": stuff_dataset_id_to_contiguous_id, + "stuff_classes": stuff_classes, + } + return ret + + +def register_all_ctx459(root): + root = os.path.join(root, "pascal_ctx_d2") + meta = _get_ctx459_meta() + for name, dirname in [("train", "training"), ("val", "validation")]: + image_dir = os.path.join(root, "images", dirname) + gt_dir = os.path.join(root, "annotations_ctx459", dirname) + name = f"openvocab_pascal_ctx459_sem_seg_{name}" + DatasetCatalog.register( + name, lambda x=image_dir, y=gt_dir: load_sem_seg(y, x, gt_ext="tif", image_ext="jpg") + ) + MetadataCatalog.get(name).set( + stuff_classes=meta["stuff_classes"][:], + thing_dataset_id_to_contiguous_id={}, # to make Mask2Former happy + stuff_dataset_id_to_contiguous_id=meta["stuff_dataset_id_to_contiguous_id"], + image_root=image_dir, + sem_seg_root=gt_dir, + evaluator_type="sem_seg", + ignore_label=65535, # NOTE: gt is saved in 16-bit TIFF images + gt_ext="tif", + ) + + + + +_root = os.getenv("DETECTRON2_DATASETS", "datasets") +register_all_ctx459(_root) \ No newline at end of file diff --git a/mask_adapter/data/datasets/register_pascal_ctx_59_sem_seg.py b/mask_adapter/data/datasets/register_pascal_ctx_59_sem_seg.py new file mode 100644 index 0000000000000000000000000000000000000000..6f3bfbf0b9ce94677caaed93c6a4cccf7c38d8c8 --- /dev/null +++ b/mask_adapter/data/datasets/register_pascal_ctx_59_sem_seg.py @@ -0,0 +1,78 @@ +""" +Copyright (2023) Bytedance Ltd. and/or its affiliates + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import os + +import numpy as np + +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.data.datasets import load_sem_seg + +from . import openseg_classes + +PASCAL_CTX_59_CATEGORIES=openseg_classes.get_pascal_ctx_59_categories_with_prompt_eng() + +PASCAL_CTX_59_COLORS = [k["color"] for k in PASCAL_CTX_59_CATEGORIES] + +MetadataCatalog.get("openvocab_pascal_ctx59_sem_seg_train").set( + stuff_colors=PASCAL_CTX_59_COLORS[:], +) + +MetadataCatalog.get("openvocab_pascal_ctx59_sem_seg_val").set( + stuff_colors=PASCAL_CTX_59_COLORS[:], +) + +def _get_ctx59_meta(): + # Id 0 is reserved for ignore_label, we change ignore_label for 0 + # to 255 in our pre-processing, so all ids are shifted by 1. + stuff_ids = [k["id"] for k in PASCAL_CTX_59_CATEGORIES] + assert len(stuff_ids) == 59, len(stuff_ids) + + # For semantic segmentation, this mapping maps from contiguous stuff id + # (in [0, 91], used in models) to ids in the dataset (used for processing results) + stuff_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(stuff_ids)} + stuff_classes = [k["name"] for k in PASCAL_CTX_59_CATEGORIES] + + ret = { + "stuff_dataset_id_to_contiguous_id": stuff_dataset_id_to_contiguous_id, + "stuff_classes": stuff_classes, + } + return ret + + +def register_all_ctx59(root): + root = os.path.join(root, "pascal_ctx_d2") + meta = _get_ctx59_meta() + for name, dirname in [("train", "training"), ("val", "validation")]: + image_dir = os.path.join(root, "images", dirname) + gt_dir = os.path.join(root, "annotations_ctx59", dirname) + name = f"openvocab_pascal_ctx59_sem_seg_{name}" + DatasetCatalog.register( + name, lambda x=image_dir, y=gt_dir: load_sem_seg(y, x, gt_ext="png", image_ext="jpg") + ) + MetadataCatalog.get(name).set( + stuff_classes=meta["stuff_classes"][:], + thing_dataset_id_to_contiguous_id={}, # to make Mask2Former happy + stuff_dataset_id_to_contiguous_id=meta["stuff_dataset_id_to_contiguous_id"], + image_root=image_dir, + sem_seg_root=gt_dir, + evaluator_type="sem_seg", + ignore_label=255, + gt_ext="png", + ) + +_root = os.getenv("DETECTRON2_DATASETS", "datasets") +register_all_ctx59(_root) \ No newline at end of file diff --git a/mask_adapter/data/datasets/register_pascal_voc_20_semantic.py b/mask_adapter/data/datasets/register_pascal_voc_20_semantic.py new file mode 100644 index 0000000000000000000000000000000000000000..2e8d24954a4523a0fa26fb2c6bccbccef0583a89 --- /dev/null +++ b/mask_adapter/data/datasets/register_pascal_voc_20_semantic.py @@ -0,0 +1,80 @@ +# Copyright (c) Facebook, Inc. and its affiliates. +import os + +import numpy as np + +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.data.datasets import load_sem_seg + +""" +Copyright (2023) Bytedance Ltd. and/or its affiliates + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +from . import openseg_classes + +PASCAL_VOC_20_CATEGORIES = openseg_classes.get_pascal_21_categories_with_prompt_eng()[1:] # remove background + +PASCAL_VOC_20_COLORS = [k["color"] for k in PASCAL_VOC_20_CATEGORIES] + +MetadataCatalog.get("openvocab_pascal20_sem_seg_train").set( + stuff_colors=PASCAL_VOC_20_COLORS[:], +) + +MetadataCatalog.get("openvocab_pascal20_sem_seg_val").set( + stuff_colors=PASCAL_VOC_20_COLORS[:], +) + + +def _get_pascal20_meta(): + # Id 0 is reserved for ignore_label, we change ignore_label for 0 + # to 255 in our pre-processing, so all ids are shifted by 1. + stuff_ids = [k["id"] for k in PASCAL_VOC_20_CATEGORIES] + assert len(stuff_ids) == 20, len(stuff_ids) + + # For semantic segmentation, this mapping maps from contiguous stuff id + # (in [0, 91], used in models) to ids in the dataset (used for processing results) + stuff_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(stuff_ids)} + stuff_classes = [k["name"] for k in PASCAL_VOC_20_CATEGORIES] + + ret = { + "stuff_dataset_id_to_contiguous_id": stuff_dataset_id_to_contiguous_id, + "stuff_classes": stuff_classes, + } + return ret + + +def register_all_pascal20(root): + root = os.path.join(root, "pascal_voc_d2") + meta = _get_pascal20_meta() + for name, dirname in [("train", "training"), ("val", "validation")]: + image_dir = os.path.join(root, "images", dirname) + gt_dir = os.path.join(root, "annotations_pascal20", dirname) + name = f"openvocab_pascal20_sem_seg_{name}" + DatasetCatalog.register( + name, lambda x=image_dir, y=gt_dir: load_sem_seg(y, x, gt_ext="png", image_ext="jpg") + ) + MetadataCatalog.get(name).set( + stuff_classes=meta["stuff_classes"][:], + thing_dataset_id_to_contiguous_id={}, # to make Mask2Former happy + stuff_dataset_id_to_contiguous_id=meta["stuff_dataset_id_to_contiguous_id"], + image_root=image_dir, + sem_seg_root=gt_dir, + evaluator_type="sem_seg", + ignore_label=255, + gt_ext="png", + ) + +_root = os.getenv("DETECTRON2_DATASETS", "datasets") +register_all_pascal20(_root) \ No newline at end of file diff --git a/mask_adapter/data/datasets/register_pascal_voc_21_semantic.py b/mask_adapter/data/datasets/register_pascal_voc_21_semantic.py new file mode 100644 index 0000000000000000000000000000000000000000..b059b654956f47942252b2bb57f78878ed487343 --- /dev/null +++ b/mask_adapter/data/datasets/register_pascal_voc_21_semantic.py @@ -0,0 +1,79 @@ +""" +Copyright (2023) Bytedance Ltd. and/or its affiliates + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import os + +import numpy as np + +from detectron2.data import DatasetCatalog, MetadataCatalog +from detectron2.data.datasets import load_sem_seg + +from . import openseg_classes + +PASCAL_VOC_21_CATEGORIES = openseg_classes.get_pascal_21_categories_with_prompt_eng() + +PASCAL_VOC_21_COLORS = [k["color"] for k in PASCAL_VOC_21_CATEGORIES] + +MetadataCatalog.get("openvocab_pascal21_sem_seg_train").set( + stuff_colors=PASCAL_VOC_21_COLORS[:], +) + +MetadataCatalog.get("openvocab_pascal21_sem_seg_val").set( + stuff_colors=PASCAL_VOC_21_COLORS[:], +) + + +def _get_pascal21_meta(): + # Id 0 is reserved for ignore_label, we change ignore_label for 0 + # to 255 in our pre-processing, so all ids are shifted by 1. + stuff_ids = [k["id"] for k in PASCAL_VOC_21_CATEGORIES] + assert len(stuff_ids) == 21, len(stuff_ids) + + # For semantic segmentation, this mapping maps from contiguous stuff id + # (in [0, 91], used in models) to ids in the dataset (used for processing results) + stuff_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(stuff_ids)} + stuff_classes = [k["name"] for k in PASCAL_VOC_21_CATEGORIES] + + ret = { + "stuff_dataset_id_to_contiguous_id": stuff_dataset_id_to_contiguous_id, + "stuff_classes": stuff_classes, + } + return ret + + +def register_all_pascal21(root): + root = os.path.join(root, "pascal_voc_d2") + meta = _get_pascal21_meta() + for name, dirname in [("train", "training"), ("val", "validation")]: + image_dir = os.path.join(root, "images", dirname) + gt_dir = os.path.join(root, "annotations_pascal21", dirname) + name = f"openvocab_pascal21_sem_seg_{name}" + DatasetCatalog.register( + name, lambda x=image_dir, y=gt_dir: load_sem_seg(y, x, gt_ext="png", image_ext="jpg") + ) + MetadataCatalog.get(name).set( + stuff_classes=meta["stuff_classes"][:], + thing_dataset_id_to_contiguous_id={}, # to make Mask2Former happy + stuff_dataset_id_to_contiguous_id=meta["stuff_dataset_id_to_contiguous_id"], + image_root=image_dir, + sem_seg_root=gt_dir, + evaluator_type="sem_seg", + ignore_label=255, + gt_ext="png", + ) + +_root = os.getenv("DETECTRON2_DATASETS", "datasets") +register_all_pascal21(_root) \ No newline at end of file diff --git a/mask_adapter/evaluation/__init__.py b/mask_adapter/evaluation/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..be75f0cd9568f901b3174ecfb43c0b9f4fa1f77d --- /dev/null +++ b/mask_adapter/evaluation/__init__.py @@ -0,0 +1,15 @@ +""" +Copyright (2023) Bytedance Ltd. and/or its affiliates + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" \ No newline at end of file diff --git a/mask_adapter/evaluation/__pycache__/__init__.cpython-310.pyc b/mask_adapter/evaluation/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8296131041a43a81a30c0e0d11f34ac6b219a745 Binary files /dev/null and b/mask_adapter/evaluation/__pycache__/__init__.cpython-310.pyc differ diff --git a/mask_adapter/evaluation/__pycache__/__init__.cpython-38.pyc b/mask_adapter/evaluation/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6e2dd3bb089a8ab0786e664ceba24120832f25f5 Binary files /dev/null and b/mask_adapter/evaluation/__pycache__/__init__.cpython-38.pyc differ diff --git a/mask_adapter/evaluation/__pycache__/coco_panoptic_evaluation.cpython-310.pyc b/mask_adapter/evaluation/__pycache__/coco_panoptic_evaluation.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..573614979797c48e4f853b4e9eb46e830924d737 Binary files /dev/null and b/mask_adapter/evaluation/__pycache__/coco_panoptic_evaluation.cpython-310.pyc differ diff --git a/mask_adapter/evaluation/__pycache__/coco_panoptic_evaluation.cpython-38.pyc b/mask_adapter/evaluation/__pycache__/coco_panoptic_evaluation.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..87c42d88f3843880910926df7d7ec690653bfc33 Binary files /dev/null and b/mask_adapter/evaluation/__pycache__/coco_panoptic_evaluation.cpython-38.pyc differ diff --git a/mask_adapter/evaluation/__pycache__/instance_evaluation.cpython-310.pyc b/mask_adapter/evaluation/__pycache__/instance_evaluation.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a581a5805f2c37af74dadf1eeaeb3b72832c7b8e Binary files /dev/null and b/mask_adapter/evaluation/__pycache__/instance_evaluation.cpython-310.pyc differ diff --git a/mask_adapter/evaluation/__pycache__/instance_evaluation.cpython-38.pyc b/mask_adapter/evaluation/__pycache__/instance_evaluation.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aaa2cdb464bc34bb90a8e58021b3c28448b98dbb Binary files /dev/null and b/mask_adapter/evaluation/__pycache__/instance_evaluation.cpython-38.pyc differ diff --git a/mask_adapter/evaluation/instance_evaluation.py b/mask_adapter/evaluation/instance_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..738c08363a26f540bd850125bdb93c6372640bf1 --- /dev/null +++ b/mask_adapter/evaluation/instance_evaluation.py @@ -0,0 +1,113 @@ +""" +This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”). +All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. + +Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/evaluation/instance_evaluation.py +""" + +import contextlib +import copy +import io +import itertools +import json +import logging +import numpy as np +import os +import pickle +from collections import OrderedDict +import pycocotools.mask as mask_util +import torch +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from tabulate import tabulate + +import detectron2.utils.comm as comm +from detectron2.config import CfgNode +from detectron2.data import MetadataCatalog +from detectron2.data.datasets.coco import convert_to_coco_json +from detectron2.evaluation.coco_evaluation import COCOEvaluator, _evaluate_predictions_on_coco +from detectron2.evaluation.fast_eval_api import COCOeval_opt +from detectron2.structures import Boxes, BoxMode, pairwise_iou +from detectron2.utils.file_io import PathManager +from detectron2.utils.logger import create_small_table + + +# modified from COCOEvaluator for instance segmetnat +class InstanceSegEvaluator(COCOEvaluator): + """ + Evaluate AR for object proposals, AP for instance detection/segmentation, AP + for keypoint detection outputs using COCO's metrics. + See http://cocodataset.org/#detection-eval and + http://cocodataset.org/#keypoints-eval to understand its metrics. + The metrics range from 0 to 100 (instead of 0 to 1), where a -1 or NaN means + the metric cannot be computed (e.g. due to no predictions made). + + In addition to COCO, this evaluator is able to support any bounding box detection, + instance segmentation, or keypoint detection dataset. + """ + + def _eval_predictions(self, predictions, img_ids=None): + """ + Evaluate predictions. Fill self._results with the metrics of the tasks. + """ + self._logger.info("Preparing results for COCO format ...") + coco_results = list(itertools.chain(*[x["instances"] for x in predictions])) + tasks = self._tasks or self._tasks_from_predictions(coco_results) + + # unmap the category ids for COCO + if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"): + dataset_id_to_contiguous_id = self._metadata.thing_dataset_id_to_contiguous_id + # all_contiguous_ids = list(dataset_id_to_contiguous_id.values()) + # num_classes = len(all_contiguous_ids) + # assert min(all_contiguous_ids) == 0 and max(all_contiguous_ids) == num_classes - 1 + + reverse_id_mapping = {v: k for k, v in dataset_id_to_contiguous_id.items()} + for result in coco_results: + category_id = result["category_id"] + # assert category_id < num_classes, ( + # f"A prediction has class={category_id}, " + # f"but the dataset only has {num_classes} classes and " + # f"predicted class id should be in [0, {num_classes - 1}]." + # ) + assert category_id in reverse_id_mapping, ( + f"A prediction has class={category_id}, " + f"but the dataset only has class ids in {dataset_id_to_contiguous_id}." + ) + result["category_id"] = reverse_id_mapping[category_id] + + if self._output_dir: + file_path = os.path.join(self._output_dir, "coco_instances_results.json") + self._logger.info("Saving results to {}".format(file_path)) + with PathManager.open(file_path, "w") as f: + f.write(json.dumps(coco_results)) + f.flush() + + if not self._do_evaluation: + self._logger.info("Annotations are not available for evaluation.") + return + + self._logger.info( + "Evaluating predictions with {} COCO API...".format( + "unofficial" if self._use_fast_impl else "official" + ) + ) + for task in sorted(tasks): + assert task in {"bbox", "segm", "keypoints"}, f"Got unknown task: {task}!" + coco_eval = ( + _evaluate_predictions_on_coco( + self._coco_api, + coco_results, + task, + kpt_oks_sigmas=self._kpt_oks_sigmas, + #use_fast_impl=self._use_fast_impl, + img_ids=img_ids, + max_dets_per_image=self._max_dets_per_image, + ) + if len(coco_results) > 0 + else None # cocoapi does not handle empty results very well + ) + + res = self._derive_coco_results( + coco_eval, task, class_names=self._metadata.get("thing_classes") + ) + self._results[task] = res diff --git a/mask_adapter/evaluation/panoptic_evaluation.py b/mask_adapter/evaluation/panoptic_evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..ff3787d230e63edafce5013891c6873aa5d4ce90 --- /dev/null +++ b/mask_adapter/evaluation/panoptic_evaluation.py @@ -0,0 +1,274 @@ +""" +This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”). +All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. + +Reference: https://github.com/cocodataset/panopticapi/blob/master/panopticapi/evaluation.py +Reference: https://github.com/open-mmlab/mmdetection/pull/7538 +""" + +#!/usr/bin/env python +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals +import os, sys +import numpy as np +import json +import time +from datetime import timedelta +from collections import defaultdict +import argparse +import multiprocessing + +import PIL.Image as Image + +from panopticapi.utils import get_traceback, rgb2id + +OFFSET = 256 * 256 * 256 +VOID = 0 + +class PQStatCat(): + def __init__(self): + self.iou = 0.0 + self.tp = 0 + self.fp = 0 + self.fn = 0 + + def __iadd__(self, pq_stat_cat): + self.iou += pq_stat_cat.iou + self.tp += pq_stat_cat.tp + self.fp += pq_stat_cat.fp + self.fn += pq_stat_cat.fn + return self + + +class PQStat(): + def __init__(self): + self.pq_per_cat = defaultdict(PQStatCat) + + def __getitem__(self, i): + return self.pq_per_cat[i] + + def __iadd__(self, pq_stat): + for label, pq_stat_cat in pq_stat.pq_per_cat.items(): + self.pq_per_cat[label] += pq_stat_cat + return self + + def pq_average(self, categories, isthing): + pq, sq, rq, n = 0, 0, 0, 0 + per_class_results = {} + for label, label_info in categories.items(): + if isthing is not None: + cat_isthing = label_info['isthing'] == 1 + if isthing != cat_isthing: + continue + iou = self.pq_per_cat[label].iou + tp = self.pq_per_cat[label].tp + fp = self.pq_per_cat[label].fp + fn = self.pq_per_cat[label].fn + if tp + fp + fn == 0: + per_class_results[label] = {'pq': 0.0, 'sq': 0.0, 'rq': 0.0} + continue + n += 1 + pq_class = iou / (tp + 0.5 * fp + 0.5 * fn) + sq_class = iou / tp if tp != 0 else 0 + rq_class = tp / (tp + 0.5 * fp + 0.5 * fn) + per_class_results[label] = {'pq': pq_class, 'sq': sq_class, 'rq': rq_class} + pq += pq_class + sq += sq_class + rq += rq_class + + return {'pq': pq / n, 'sq': sq / n, 'rq': rq / n, 'n': n}, per_class_results + + +@get_traceback +def pq_compute_single_core(proc_id, annotation_set, gt_folder, pred_folder, categories): + pq_stat = PQStat() + + idx = 0 + for gt_ann, pred_ann in annotation_set: + if idx % 100 == 0: + print('Core: {}, {} from {} images processed'.format(proc_id, idx, len(annotation_set))) + idx += 1 + + pan_gt = np.array(Image.open(os.path.join(gt_folder, gt_ann['file_name'])), dtype=np.uint32) + pan_gt = rgb2id(pan_gt) + pan_pred = np.array(Image.open(os.path.join(pred_folder, pred_ann['file_name'])), dtype=np.uint32) + pan_pred = rgb2id(pan_pred) + + gt_segms = {el['id']: el for el in gt_ann['segments_info']} + pred_segms = {el['id']: el for el in pred_ann['segments_info']} + + # predicted segments area calculation + prediction sanity checks + pred_labels_set = set(el['id'] for el in pred_ann['segments_info']) + labels, labels_cnt = np.unique(pan_pred, return_counts=True) + for label, label_cnt in zip(labels, labels_cnt): + if label not in pred_segms: + if label == VOID: + continue + raise KeyError('In the image with ID {} segment with ID {} is presented in PNG and not presented in JSON.'.format(gt_ann['image_id'], label)) + pred_segms[label]['area'] = label_cnt + pred_labels_set.remove(label) + if pred_segms[label]['category_id'] not in categories: + raise KeyError('In the image with ID {} segment with ID {} has unknown category_id {}.'.format(gt_ann['image_id'], label, pred_segms[label]['category_id'])) + if len(pred_labels_set) != 0: + raise KeyError('In the image with ID {} the following segment IDs {} are presented in JSON and not presented in PNG.'.format(gt_ann['image_id'], list(pred_labels_set))) + + # confusion matrix calculation + pan_gt_pred = pan_gt.astype(np.uint64) * OFFSET + pan_pred.astype(np.uint64) + gt_pred_map = {} + labels, labels_cnt = np.unique(pan_gt_pred, return_counts=True) + for label, intersection in zip(labels, labels_cnt): + gt_id = label // OFFSET + pred_id = label % OFFSET + gt_pred_map[(gt_id, pred_id)] = intersection + + # count all matched pairs + gt_matched = set() + pred_matched = set() + for label_tuple, intersection in gt_pred_map.items(): + gt_label, pred_label = label_tuple + if gt_label not in gt_segms: + continue + if pred_label not in pred_segms: + continue + if gt_segms[gt_label]['iscrowd'] == 1: + continue + if gt_segms[gt_label]['category_id'] != pred_segms[pred_label]['category_id']: + continue + + union = pred_segms[pred_label]['area'] + gt_segms[gt_label]['area'] - intersection - gt_pred_map.get((VOID, pred_label), 0) + iou = intersection / union + if iou > 0.5: + pq_stat[gt_segms[gt_label]['category_id']].tp += 1 + pq_stat[gt_segms[gt_label]['category_id']].iou += iou + gt_matched.add(gt_label) + pred_matched.add(pred_label) + + # count false positives + crowd_labels_dict = {} + for gt_label, gt_info in gt_segms.items(): + if gt_label in gt_matched: + continue + # crowd segments are ignored + if gt_info['iscrowd'] == 1: + crowd_labels_dict[gt_info['category_id']] = gt_label + continue + pq_stat[gt_info['category_id']].fn += 1 + + # count false positives + for pred_label, pred_info in pred_segms.items(): + if pred_label in pred_matched: + continue + # intersection of the segment with VOID + intersection = gt_pred_map.get((VOID, pred_label), 0) + # plus intersection with corresponding CROWD region if it exists + if pred_info['category_id'] in crowd_labels_dict: + intersection += gt_pred_map.get((crowd_labels_dict[pred_info['category_id']], pred_label), 0) + # predicted segment is ignored if more than half of the segment correspond to VOID and CROWD regions + if intersection / pred_info['area'] > 0.5: + continue + pq_stat[pred_info['category_id']].fp += 1 + print('Core: {}, all {} images processed'.format(proc_id, len(annotation_set))) + return pq_stat + + +def pq_compute_multi_core(matched_annotations_list, gt_folder, pred_folder, categories): + cpu_num = multiprocessing.cpu_count() + annotations_split = np.array_split(matched_annotations_list, cpu_num) + print("Number of cores: {}, images per core: {}".format(cpu_num, len(annotations_split[0]))) + workers = multiprocessing.Pool(processes=cpu_num) + processes = [] + for proc_id, annotation_set in enumerate(annotations_split): + p = workers.apply_async(pq_compute_single_core, + (proc_id, annotation_set, gt_folder, pred_folder, categories)) + processes.append(p) + + # https://github.com/open-mmlab/mmdetection/pull/7538 + # Close the process pool, otherwise it will lead to memory + # leaking problems. + workers.close() + workers.join() + + + pq_stat = PQStat() + for p in processes: + pq_stat += p.get() + return pq_stat + + +def pq_compute(gt_json_file, pred_json_file, gt_folder=None, pred_folder=None): + + start_time = time.time() + with open(gt_json_file, 'r') as f: + gt_json = json.load(f) + with open(pred_json_file, 'r') as f: + pred_json = json.load(f) + + if gt_folder is None: + gt_folder = gt_json_file.replace('.json', '') + if pred_folder is None: + pred_folder = pred_json_file.replace('.json', '') + categories = {el['id']: el for el in gt_json['categories']} + + print("Evaluation panoptic segmentation metrics:") + print("Ground truth:") + print("\tSegmentation folder: {}".format(gt_folder)) + print("\tJSON file: {}".format(gt_json_file)) + print("Prediction:") + print("\tSegmentation folder: {}".format(pred_folder)) + print("\tJSON file: {}".format(pred_json_file)) + + if not os.path.isdir(gt_folder): + raise Exception("Folder {} with ground truth segmentations doesn't exist".format(gt_folder)) + if not os.path.isdir(pred_folder): + raise Exception("Folder {} with predicted segmentations doesn't exist".format(pred_folder)) + + pred_annotations = {el['image_id']: el for el in pred_json['annotations']} + matched_annotations_list = [] + for gt_ann in gt_json['annotations']: + image_id = gt_ann['image_id'] + if image_id not in pred_annotations: + raise Exception('no prediction for the image with id: {}'.format(image_id)) + matched_annotations_list.append((gt_ann, pred_annotations[image_id])) + + pq_stat = pq_compute_multi_core(matched_annotations_list, gt_folder, pred_folder, categories) + + metrics = [("All", None), ("Things", True), ("Stuff", False)] + results = {} + for name, isthing in metrics: + results[name], per_class_results = pq_stat.pq_average(categories, isthing=isthing) + if name == 'All': + results['per_class'] = per_class_results + print("{:10s}| {:>5s} {:>5s} {:>5s} {:>5s}".format("", "PQ", "SQ", "RQ", "N")) + print("-" * (10 + 7 * 4)) + + for name, _isthing in metrics: + print("{:10s}| {:5.1f} {:5.1f} {:5.1f} {:5d}".format( + name, + 100 * results[name]['pq'], + 100 * results[name]['sq'], + 100 * results[name]['rq'], + results[name]['n']) + ) + + t_delta = time.time() - start_time + print("Time elapsed: {:0.2f} seconds".format(t_delta)) + + return results + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--gt_json_file', type=str, + help="JSON file with ground truth data") + parser.add_argument('--pred_json_file', type=str, + help="JSON file with predictions data") + parser.add_argument('--gt_folder', type=str, default=None, + help="Folder with ground turth COCO format segmentations. \ + Default: X if the corresponding json file is X.json") + parser.add_argument('--pred_folder', type=str, default=None, + help="Folder with prediction COCO format segmentations. \ + Default: X if the corresponding json file is X.json") + args = parser.parse_args() + pq_compute(args.gt_json_file, args.pred_json_file, args.gt_folder, args.pred_folder) \ No newline at end of file diff --git a/mask_adapter/mask_adapter.py b/mask_adapter/mask_adapter.py new file mode 100644 index 0000000000000000000000000000000000000000..bfd413c481b60bb7bd66e47a1cb937079c8c14dd --- /dev/null +++ b/mask_adapter/mask_adapter.py @@ -0,0 +1,740 @@ +""" +This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”). +All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. + +Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/maskformer_model.py +""" +from typing import Tuple +import os +import numpy as np +import torch +from torch import nn +from torch.nn import functional as F +from torchvision import transforms as T +from detectron2.config import configurable +from detectron2.data import MetadataCatalog +from detectron2.modeling import META_ARCH_REGISTRY, build_backbone +from detectron2.modeling.backbone import Backbone +from detectron2.modeling.postprocessing import sem_seg_postprocess +from detectron2.structures import Boxes, ImageList, Instances, BitMasks +from detectron2.utils.memory import retry_if_cuda_oom +from .modeling.maft.content_dependent_transfer import ContentDependentTransfer +from .modeling.meta_arch.mask_adapter_head import build_mask_adapter + + + + +VILD_PROMPT = [ + "a photo of a {}.", + "This is a photo of a {}", + "There is a {} in the scene", + "There is the {} in the scene", + "a photo of a {} in the scene", + "a photo of a small {}.", + "a photo of a medium {}.", + "a photo of a large {}.", + "This is a photo of a small {}.", + "This is a photo of a medium {}.", + "This is a photo of a large {}.", + "There is a small {} in the scene.", + "There is a medium {} in the scene.", + "There is a large {} in the scene.", +] + +@META_ARCH_REGISTRY.register() +class MASK_Adapter(nn.Module): + """ + Main class for mask classification semantic segmentation architectures. + """ + + @configurable + def __init__( + self, + *, + backbone: Backbone, + mask_adapter: nn.Module, + weight_dict, + num_queries: int, + object_mask_threshold: float, + overlap_threshold: float, + mask_threshold: float, + train_metadata, + test_metadata, + size_divisibility: int, + sem_seg_postprocess_before_inference: bool, + pixel_mean: Tuple[float], + pixel_std: Tuple[float], + # inference + semantic_on: bool, + panoptic_on: bool, + instance_on: bool, + test_topk_per_image: int, + train_maft : bool, + num_output_maps: int, + ): + """ + Args: + backbone: a backbone module, must follow detectron2's backbone interface + mask_adapter: mask-adapter extract semantic activation maps from masks + weight_dict: dict contains weight for each loss + num_queries: int, number of queries + object_mask_threshold: float, threshold to filter query based on classification score + for panoptic segmentation inference + overlap_threshold: overlap threshold used in general inference for panoptic segmentation + metadata: dataset meta, get `thing` and `stuff` category names for panoptic + segmentation inference + size_divisibility: Some backbones require the input height and width to be divisible by a + specific integer. We can use this to override such requirement. + sem_seg_postprocess_before_inference: whether to resize the prediction back + to original input size before semantic segmentation inference or after. + For high-resolution dataset like Mapillary, resizing predictions before + inference will cause OOM error. + pixel_mean, pixel_std: list or tuple with #channels element, representing + the per-channel mean and std to be used to normalize the input image + semantic_on: bool, whether to output semantic segmentation prediction + instance_on: bool, whether to output instance segmentation prediction + panoptic_on: bool, whether to output panoptic segmentation prediction + test_topk_per_image: int, instance segmentation parameter, keep topk instances per image + """ + super().__init__() + self.backbone = backbone + self.mask_adapter = mask_adapter + self.weight_dict = weight_dict + self.num_queries = num_queries + self.overlap_threshold = overlap_threshold + self.object_mask_threshold = object_mask_threshold + self.mask_threshold = mask_threshold + self.train_metadata = train_metadata + self.test_metadata = test_metadata + if size_divisibility < 0: + # use backbone size_divisibility if not set + size_divisibility = self.backbone.size_divisibility + self.size_divisibility = size_divisibility + self.sem_seg_postprocess_before_inference = sem_seg_postprocess_before_inference + self.register_buffer("pixel_mean", torch.Tensor(pixel_mean).view(-1, 1, 1), False) + self.register_buffer("pixel_std", torch.Tensor(pixel_std).view(-1, 1, 1), False) + + # additional args + self.semantic_on = semantic_on + self.instance_on = instance_on + self.panoptic_on = panoptic_on + self.test_topk_per_image = test_topk_per_image + + if not self.semantic_on: + assert self.sem_seg_postprocess_before_inference + + self.void_embedding = nn.Embedding(1, backbone.dim_latent) + self.train_dataname = None + self.test_dataname = None + self.train_num_templates = {} + self.train_text_classifier = {} + self.train_maft = train_maft + self.num_output_maps = num_output_maps + + if self.train_maft: + if '_base' in backbone.model_name.lower(): + cdt_params = [640, 8] + elif '_large' in backbone.model_name.lower(): + cdt_params = [768, 8] + self.cdt = ContentDependentTransfer(d_model = cdt_params[0], nhead = cdt_params[1], panoptic_on = panoptic_on) + self.freeze_cdt() + + def freeze_cdt(self): + for param in self.cdt.parameters(): + param.requires_grad = False + + #https://github.com/bytedance/fc-clip/blob/2b0bbe213070d44da9182530fa2e826fef03f974/fcclip/fcclip.py#L139 + def prepare_class_names_from_metadata(self, metadata, train_metadata): + def split_labels(x): + res = [] + for x_ in x: + x_ = x_.replace(', ', ',') + x_ = x_.split(',') # there can be multiple synonyms for single class + res.append(x_) + return res + # get text classifier + try: + class_names = split_labels(metadata.stuff_classes) # it includes both thing and stuff + train_class_names = split_labels(train_metadata.stuff_classes) + except: + # this could be for insseg, where only thing_classes are available + class_names = split_labels(metadata.thing_classes) + train_class_names = split_labels(train_metadata.thing_classes) + train_class_names = {l for label in train_class_names for l in label} + category_overlapping_list = [] + for test_class_names in class_names: + is_overlapping = not set(train_class_names).isdisjoint(set(test_class_names)) + category_overlapping_list.append(is_overlapping) + category_overlapping_mask = torch.tensor( + category_overlapping_list, dtype=torch.long) + + def fill_all_templates_ensemble(x_=''): + res = [] + for x in x_: + for template in VILD_PROMPT: + res.append(template.format(x)) + return res, len(res) // len(VILD_PROMPT) + + num_templates = [] + templated_class_names = [] + for x in class_names: + templated_classes, templated_classes_num = fill_all_templates_ensemble(x) + templated_class_names += templated_classes + num_templates.append(templated_classes_num) # how many templates for current classes + class_names = templated_class_names + #print("text for classification:", class_names) + return category_overlapping_mask, num_templates, class_names + + def set_metadata(self, metadata): + self.test_metadata = metadata + self.category_overlapping_mask, self.test_num_templates, self.test_class_names = self.prepare_class_names_from_metadata(metadata, self.train_metadata) + self.test_text_classifier = None + return + + def get_text_classifier(self, dataname): + + if self.training: + os.makedirs("text_embedding", exist_ok=True) + out_path = f"./text_embedding/{dataname}_text_embedding.npy" + if dataname in self.train_text_classifier: + return self.train_text_classifier[dataname], self.train_num_templates[dataname] + + if dataname not in self.train_num_templates: + _, self.train_num_templates[dataname], train_class_names = self.prepare_class_names_from_metadata( + self.train_metadata[dataname], self.train_metadata[dataname] + ) + + if os.path.exists(out_path): + text_classifier = torch.from_numpy(np.load(out_path)).to(self.device) + else: + text_classifier = [] + bs = 128 + + for idx in range(0, len(train_class_names), bs): + text_classifier.append( + self.backbone.get_text_classifier(train_class_names[idx:idx+bs], self.device).detach() + ) + text_classifier = torch.cat(text_classifier, dim=0) + + text_classifier /= text_classifier.norm(dim=-1, keepdim=True) + text_classifier = text_classifier.reshape(text_classifier.shape[0] // len(VILD_PROMPT), len(VILD_PROMPT), text_classifier.shape[-1]).mean(1) + text_classifier /= text_classifier.norm(dim=-1, keepdim=True) + + np.save(out_path, text_classifier.cpu().numpy()) + + self.train_text_classifier[dataname] = text_classifier + return self.train_text_classifier[dataname], self.train_num_templates[dataname] + else: + if self.test_dataname != dataname: + self.category_overlapping_mask, self.test_num_templates, self.test_class_names = self.prepare_class_names_from_metadata( + self.test_metadata[dataname], self.test_metadata[dataname] + ) + text_classifier = [] + bs = 128 + for idx in range(0, len(self.test_class_names), bs): + text_classifier.append( + self.backbone.get_text_classifier(self.test_class_names[idx:idx+bs], self.device).detach() + ) + text_classifier = torch.cat(text_classifier, dim=0) + + text_classifier /= text_classifier.norm(dim=-1, keepdim=True) + text_classifier = text_classifier.reshape(text_classifier.shape[0] // len(VILD_PROMPT), len(VILD_PROMPT), text_classifier.shape[-1]).mean(1) + text_classifier /= text_classifier.norm(dim=-1, keepdim=True) + self.test_text_classifier = text_classifier + self.test_dataname = dataname + + return self.test_text_classifier, self.test_num_templates + + @classmethod + def from_config(cls, cfg): + backbone = build_backbone(cfg) + mask_adapter = build_mask_adapter(cfg, cfg.MODEL.MASK_ADAPTER.NAME) + + # loss weights + class_weight = cfg.MODEL.MASK_FORMER.CLASS_WEIGHT + + # building criterion + weight_dict = {"loss_ce": class_weight} + + losses = ["labels"] + + train_metadata = {i: MetadataCatalog.get(i) for i in cfg.DATASETS.TRAIN} + test_metadata = {i: MetadataCatalog.get(i) for i in cfg.DATASETS.TEST} + + return { + "backbone": backbone, + "mask_adapter": mask_adapter, + "weight_dict": weight_dict, + "num_queries": cfg.MODEL.MASK_FORMER.NUM_OBJECT_QUERIES, + "object_mask_threshold": cfg.MODEL.MASK_FORMER.TEST.OBJECT_MASK_THRESHOLD, + "overlap_threshold": cfg.MODEL.MASK_FORMER.TEST.OVERLAP_THRESHOLD, + "mask_threshold": cfg.MODEL.MASK_ADAPTER.MASK_THRESHOLD, + "train_metadata": train_metadata,#MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), + "test_metadata": test_metadata, # MetadataCatalog.get(cfg.DATASETS.TEST[0]), + "size_divisibility": cfg.MODEL.MASK_FORMER.SIZE_DIVISIBILITY, + "sem_seg_postprocess_before_inference": ( + cfg.MODEL.MASK_FORMER.TEST.SEM_SEG_POSTPROCESSING_BEFORE_INFERENCE + or cfg.MODEL.MASK_FORMER.TEST.PANOPTIC_ON + or cfg.MODEL.MASK_FORMER.TEST.INSTANCE_ON + ), + "pixel_mean": cfg.MODEL.PIXEL_MEAN, + "pixel_std": cfg.MODEL.PIXEL_STD, + # inference + "semantic_on": cfg.MODEL.MASK_FORMER.TEST.SEMANTIC_ON, + "instance_on": cfg.MODEL.MASK_FORMER.TEST.INSTANCE_ON, + "panoptic_on": cfg.MODEL.MASK_FORMER.TEST.PANOPTIC_ON, + "test_topk_per_image": cfg.TEST.DETECTIONS_PER_IMAGE, + "train_maft": cfg.MODEL.MASK_ADAPTER.TRAIN_MAFT, + "num_output_maps": cfg.MODEL.MASK_ADAPTER.NUM_OUTPUT_MAPS + } + + @property + def device(self): + return self.pixel_mean.device + + def forward(self, batched_inputs): + """ + Args: + batched_inputs: a list, batched outputs of :class:`DatasetMapper`. + Each item in the list contains the inputs for one image. + For now, each item in the list is a dict that contains: + * "image": Tensor, image in (C, H, W) format. + * "instances": per-region ground truth + * Other information that's included in the original dicts, such as: + "height", "width" (int): the output resolution of the model (may be different + from input resolution), used in inference. + Returns: + list[dict]: + each dict has the results for one image. The dict contains the following keys: + + * "sem_seg": + A Tensor that represents the + per-pixel segmentation prediced by the head. + The prediction has shape KxHxW that represents the logits of + each class for each pixel. + * "panoptic_seg": + A tuple that represent panoptic output + panoptic_seg (Tensor): of shape (height, width) where the values are ids for each segment. + segments_info (list[dict]): Describe each segment in `panoptic_seg`. + Each dict contains keys "id", "category_id", "isthing". + """ + if self.train_maft and self.training : + dataname = "openvocab_coco_2017_train_stuff_sem_seg" + else: + dataname = batched_inputs[0]['dataname'] + if self.training: + dataname_2 = batched_inputs[1]['dataname'] + assert dataname == dataname_2, f"expect batch img from same dataset, but different from {dataname} and {dataname_2}" + + images = [x["image"].to(self.device) for x in batched_inputs] + images = [(x - self.pixel_mean) / self.pixel_std for x in images] + images = ImageList.from_tensors(images, self.size_divisibility) + + features = self.backbone(images.tensor) + + clip_feature = features['clip_vis_dense'] + text_classifier, num_templates = self.get_text_classifier(dataname) + + text_classifier = torch.cat([text_classifier, F.normalize(self.void_embedding.weight, dim=-1)], dim=0) + + clip_vis_dense = self.visual_prediction_forward_convnext_2d(clip_feature) + + if self.train_maft: + #https://github.com/jiaosiyu1999/MAFT-Plus/blob/fd12806df651d309883229de9503e40533f92689/maft/maft_plus.py#L352 + #For maftp,it uses a wrong reshape operation to get clip_vis_dense. Since we don't finetune cdt, we follow them. + img_feat = self.visual_prediction_forward_convnext(clip_feature) + text_classifier = self.cdt(img_feat, text_classifier) + clip_vis_dense = img_feat + else: + clip_vis_dense = self.visual_prediction_forward_convnext_2d(clip_feature) + + if self.training: + # mask classification target + if "instances" in batched_inputs[0]: + gt_instances = [x["instances"].to(self.device) for x in batched_inputs] + targets,masks,labels = self.prepare_targets(gt_instances, images) + else: + targets = None + + semantic_activation_maps = self.mask_adapter(clip_vis_dense, masks) + + maps_for_pooling = F.interpolate(semantic_activation_maps, size=clip_feature.shape[-2:], + mode='bilinear', align_corners=False) + if "convnext" in self.backbone.model_name.lower(): + B, C = clip_feature.size(0),clip_feature.size(1) + N = maps_for_pooling.size(1) + num_instances = N // self.num_output_maps + maps_for_pooling = F.softmax(F.logsigmoid(maps_for_pooling).view(B, N,-1), dim=-1) + pooled_clip_feature = torch.bmm(maps_for_pooling, clip_feature.view(B, C, -1).permute(0, 2, 1)) + pooled_clip_feature = self.backbone.visual_prediction_forward(pooled_clip_feature) + pooled_clip_feature = (pooled_clip_feature.reshape(B,num_instances, self.num_output_maps, -1).mean(dim=-2).contiguous()) + else: + raise NotImplementedError + + mask_cls_results = get_classification_logits(pooled_clip_feature, text_classifier, self.backbone.clip_model.logit_scale, num_templates) + + losses = self.cross_entropy_loss(mask_cls_results, labels) + + for k in list(losses.keys()): + if k in self.weight_dict: + losses[k] *= self.weight_dict[k] + else: + # remove this loss if not specified in `weight_dict` + losses.pop(k) + return losses + else: + masks = [] + classes = [] + for input_per_image in batched_inputs: + height = input_per_image.get("height") + width = input_per_image.get("width") + sem_seg = input_per_image["sem_seg"].to(self.device) + total_masks,class_label = self.sem_seg_2_gt_masks(sem_seg, height, width) + masks.append(total_masks) + classes.append(class_label) + masks = torch.stack(masks) + classes = torch.stack(classes) + + outputs = self.mask_adapter(clip_vis_dense, masks) + + maps_for_pooling = F.interpolate(outputs, size=clip_vis_dense.shape[-2:], + mode='bilinear', align_corners=False) + if "convnext" in self.backbone.model_name.lower(): + B,C = clip_feature.size(0),clip_feature.size(1) + N = maps_for_pooling.size(1) + num_instances = N // self.num_output_maps + maps_for_pooling = F.softmax(F.logsigmoid(maps_for_pooling).view(B, N,-1), dim=-1) + pooled_clip_feature = torch.bmm(maps_for_pooling, clip_feature.view(B, C, -1).permute(0, 2, 1)) + pooled_clip_feature = self.backbone.visual_prediction_forward(pooled_clip_feature) + pooled_clip_feature = (pooled_clip_feature.reshape(B,num_instances, self.num_output_maps, -1).mean(dim=-2).contiguous()) + else: + raise NotImplementedError + + mask_cls_results = get_classification_logits(pooled_clip_feature, text_classifier, self.backbone.clip_model.logit_scale, num_templates) + + mask_cls_results = mask_cls_results.softmax(-1) + + #upsample masks + mask_pred_results = F.interpolate( + masks, + size=(images.tensor.shape[-2], images.tensor.shape[-1]), + mode="bilinear", + align_corners=False, + ) + + processed_results = [] + for mask_cls_result, mask_pred_result, input_per_image, image_size in zip( + mask_cls_results, mask_pred_results, batched_inputs, images.image_sizes + ): + + height = input_per_image.get("height", image_size[0]) + width = input_per_image.get("width", image_size[1]) + processed_results.append({}) + + if self.sem_seg_postprocess_before_inference: + mask_pred_result = retry_if_cuda_oom(sem_seg_postprocess)( + mask_pred_result, image_size, height, width + ) + mask_cls_result = mask_cls_result.to(mask_pred_result) + + mask_pred_result = mask_pred_result.squeeze(1) + # semantic segmentation inference + if self.semantic_on: + r = retry_if_cuda_oom(self.semantic_inference)(mask_cls_result, mask_pred_result) + if not self.sem_seg_postprocess_before_inference: + r = retry_if_cuda_oom(sem_seg_postprocess)(r, image_size, height, width) + processed_results[-1]["sem_seg"] = r + + # panoptic segmentation inference + if self.panoptic_on: + panoptic_r = retry_if_cuda_oom(self.panoptic_inference)(mask_cls_result, mask_pred_result) + processed_results[-1]["panoptic_seg"] = panoptic_r + + # instance segmentation inference + if self.instance_on: + instance_r = retry_if_cuda_oom(self.instance_inference)(mask_cls_result, mask_pred_result) + processed_results[-1]["instances"] = instance_r + + return processed_results + + def sem_seg_2_gt_masks(self, sem_seg, height, width): + classes = torch.unique(sem_seg,sorted=False,return_inverse=False,return_counts=False) + gt_labels = classes[classes != 255] + masks = [sem_seg == class_id for class_id in gt_labels] + + if len(masks) == 0: + gt_masks = torch.zeros((0, sem_seg.shape[-2], + sem_seg.shape[-1])).to(sem_seg) + else: + gt_masks = torch.stack(masks).squeeze(1) + + num_masks = gt_masks.shape[0] + total_masks = torch.zeros((num_masks, gt_masks.shape[1], gt_masks.shape[2]), dtype=gt_masks.dtype, device=gt_masks.device) + labels = torch.zeros((num_masks), device=gt_masks.device) + + total_masks[:num_masks] = gt_masks[:num_masks] + labels[:num_masks] = gt_labels[:num_masks] + + return total_masks.float(), labels + + def visual_prediction_forward_convnext(self, x): + batch, channel, h, w = x.shape + + x = x.reshape(batch*h*w, channel).unsqueeze(-1).unsqueeze(-1) # fake 2D input + + x = self.backbone.clip_model.visual.trunk.head(x) + + x = self.backbone.clip_model.visual.head(x) + + return x.reshape(batch, h, w, x.shape[-1]).permute(0,3,1,2) + + def visual_prediction_forward_convnext_2d(self, x): + + clip_vis_dense = self.backbone.clip_model.visual.trunk.head.norm(x) + clip_vis_dense = self.backbone.clip_model.visual.trunk.head.drop(clip_vis_dense.permute(0, 2, 3, 1)) + clip_vis_dense = self.backbone.clip_model.visual.head(clip_vis_dense).permute(0, 3, 1, 2) + + return clip_vis_dense + + def cross_entropy_loss(self, mask_cls_results, labels): + + if torch.all(labels == -1): + loss_ce = mask_cls_results.sum() * 0.0 + else: + loss_ce = F.cross_entropy(mask_cls_results.transpose(1, 2), labels.to(torch.int64), ignore_index=-1) #remove celoss weight because of multiple datasets training + + losses = {"loss_ce": loss_ce} + return losses + + def prepare_targets(self, targets, images): + h_pad, w_pad = images.tensor.shape[-2:] + new_targets = [] + masks_list = [] + labels_list = [] + + num_masks = 32 + min_mask_area = 0 + + for targets_per_image in targets: + gt_masks = targets_per_image.gt_masks + if isinstance(gt_masks, BitMasks): + gt_masks = gt_masks.tensor + valid_mask_indices = [i for i, mask in enumerate(gt_masks) if mask.sum() > min_mask_area] + + if len(valid_mask_indices) > 0: + valid_gt_masks = gt_masks[valid_mask_indices] + valid_gt_classes = targets_per_image.gt_classes[valid_mask_indices] + + padded_masks = torch.zeros((valid_gt_masks.shape[0], h_pad, w_pad), dtype=valid_gt_masks.dtype, device=valid_gt_masks.device) + padded_masks[:, : valid_gt_masks.shape[1], : valid_gt_masks.shape[2]] = valid_gt_masks + new_targets.append( + { + "labels": valid_gt_classes, + "masks": padded_masks, + } + ) + + total_masks = torch.zeros((num_masks, h_pad, w_pad), dtype=gt_masks.dtype, device=gt_masks.device) + selected_labels = torch.zeros((num_masks), device=gt_masks.device) + + if valid_gt_masks.shape[0] > num_masks: + selected_indices = torch.randperm(valid_gt_masks.shape[0])[:num_masks] + for idx, mask_idx in enumerate(selected_indices): + total_masks[idx, :valid_gt_masks[mask_idx].shape[0], :valid_gt_masks[mask_idx].shape[1]] = valid_gt_masks[mask_idx] + selected_labels[idx] = valid_gt_classes[mask_idx] + else: + for idx in range(valid_gt_masks.shape[0]): + total_masks[idx, :valid_gt_masks[idx].shape[0], :valid_gt_masks[idx].shape[1]] = valid_gt_masks[idx] + selected_labels[idx] = valid_gt_classes[idx] + + for idx in range(valid_gt_masks.shape[0], num_masks): + total_masks[idx] = torch.zeros((h_pad, w_pad), dtype=gt_masks.dtype, device=gt_masks.device) + selected_labels[idx] = -1 + else: + total_masks = torch.zeros((num_masks, h_pad, w_pad), dtype=gt_masks.dtype, device=gt_masks.device) + selected_labels = torch.zeros((num_masks), device=gt_masks.device) + selected_labels.fill_(-1) + + padded_masks = torch.zeros((0, h_pad, w_pad), dtype=gt_masks.dtype, device=gt_masks.device) + valid_gt_classes = torch.zeros((0), device=gt_masks.device) + new_targets.append( + { + "labels": valid_gt_classes, + "masks": padded_masks, + } + ) + + masks_list.append(total_masks) + labels_list.append(selected_labels) + + masks = torch.stack(masks_list, dim=0) + labels = torch.stack(labels_list, dim=0) + labels = labels.long() + + return new_targets, masks, labels + + def semantic_inference(self, mask_cls, mask_pred): + + mask_cls = F.softmax(mask_cls, dim=-1)[..., :-1] + if mask_pred.dim() == 4: + mask_pred = mask_pred.squeeze(dim=0) + #mask_pred = mask_pred.sigmoid() #remove because of gt masks + semseg = torch.einsum("qc,qhw->chw", mask_cls, mask_pred) + return semseg + + def panoptic_inference(self, mask_cls, mask_pred): + + + scores, labels = F.softmax(mask_cls, dim=-1).max(-1) + num_classes = len(self.test_metadata[self.test_dataname].stuff_classes) + keep = labels.ne(num_classes) & (scores > self.object_mask_threshold) + cur_scores = scores[keep] + cur_classes = labels[keep] + cur_masks = mask_pred[keep] + cur_mask_cls = mask_cls[keep] + cur_mask_cls = cur_mask_cls[:, :-1] + + cur_prob_masks = cur_scores.view(-1, 1, 1) * cur_masks + + h, w = cur_masks.shape[-2:] + panoptic_seg = torch.zeros((h, w), dtype=torch.int32, device=cur_masks.device) + segments_info = [] + + current_segment_id = 0 + + if cur_masks.shape[0] == 0: + # We didn't detect any mask :( + return panoptic_seg, segments_info + else: + # take argmax + cur_mask_ids = cur_prob_masks.argmax(0) + stuff_memory_list = {} + for k in range(cur_classes.shape[0]): + pred_class = cur_classes[k].item() + isthing = pred_class in self.test_metadata[self.test_dataname].thing_dataset_id_to_contiguous_id.values() + mask_area = (cur_mask_ids == k).sum().item() + original_area = (cur_masks[k] >= 0.5).sum().item() + mask = (cur_mask_ids == k) & (cur_masks[k] >= 0.5) + + if mask_area > 0 and original_area > 0 and mask.sum().item() > 0: + if mask_area / original_area < self.overlap_threshold: + continue + + # merge stuff regions + if not isthing: + if int(pred_class) in stuff_memory_list.keys(): + panoptic_seg[mask] = stuff_memory_list[int(pred_class)] + continue + else: + stuff_memory_list[int(pred_class)] = current_segment_id + 1 + + current_segment_id += 1 + panoptic_seg[mask] = current_segment_id + + segments_info.append( + { + "id": current_segment_id, + "isthing": bool(isthing), + "category_id": int(pred_class), + } + ) + + return panoptic_seg, segments_info + + def instance_inference(self, mask_cls, mask_pred): + # mask_pred is already processed to have the same shape as original input + + image_size = mask_pred.shape[-2:] + + # [Q, K] + #scores = F.softmax(mask_cls, dim=-1)[:, :-1] #[250,150] + scores = mask_cls[:, :-1].sigmoid() + # if this is panoptic segmentation + if self.panoptic_on: + num_classes = len(self.test_metadata[self.test_dataname].stuff_classes) + else: + num_classes = len(self.test_metadata[self.test_dataname].thing_classes) + labels = torch.arange(num_classes, device=self.device).unsqueeze(0).repeat(self.num_queries, 1).flatten(0, 1) + # scores_per_image, topk_indices = scores.flatten(0, 1).topk(self.num_queries, sorted=False) + scores_per_image, topk_indices = scores.flatten(0, 1).topk(self.test_topk_per_image, sorted=False) + labels_per_image = labels[topk_indices] + + topk_indices = topk_indices // num_classes + # mask_pred = mask_pred.unsqueeze(1).repeat(1, self.sem_seg_head.num_classes, 1).flatten(0, 1) + mask_pred = mask_pred[topk_indices] + + # if this is panoptic segmentation, we only keep the "thing" classes + if self.panoptic_on: + keep = torch.zeros_like(scores_per_image).bool() + for i, lab in enumerate(labels_per_image): + keep[i] = lab in self.test_metadata[self.test_dataname].thing_dataset_id_to_contiguous_id.values() + + scores_per_image = scores_per_image[keep] + labels_per_image = labels_per_image[keep] + mask_pred = mask_pred[keep] + + result = Instances(image_size) + # mask (before sigmoid) + result.pred_masks = (mask_pred > self.mask_threshold).float() + result.pred_boxes = Boxes(torch.zeros(mask_pred.size(0), 4)) + # Uncomment the following to get boxes from masks (this is slow) + # result.pred_boxes = BitMasks(mask_pred > 0).get_bounding_boxes() + + # calculate average mask prob + mask_scores_per_image = (mask_pred.flatten(1) * result.pred_masks.flatten(1)).sum(1) / (result.pred_masks.flatten(1).sum(1) + 1e-6) + result.scores = scores_per_image * mask_scores_per_image + result.pred_classes = labels_per_image + return result + +class MaskPooling(nn.Module): + def __init__( + self,mask_threshold + ): + super().__init__() + self.mask_threshold = mask_threshold + + def forward(self, x, mask): + """ + Args: + x: [B, C, H, W] + mask: [B, Q, H, W] + """ + if not x.shape[-2:] == mask.shape[-2:]: + # reshape mask to x + mask = F.interpolate(mask, size=x.shape[-2:], mode='bilinear', align_corners=False) + with torch.no_grad(): + mask = mask.detach() + binary_mask = (mask > self.mask_threshold).to(mask.dtype) + mask = binary_mask * mask + denorm = mask.sum(dim=(-1, -2), keepdim=True) + 1e-8 + + mask_pooled_x = torch.einsum( + "bchw,bqhw->bqc", + x, + mask / denorm, + ) + return mask_pooled_x + +def get_classification_logits(x, text_classifier, logit_scale, num_templates=None): + # x in shape of [B, *, C] + # text_classifier in shape of [num_classes, C] + # logit_scale is a learnable scalar https://github.com/mlfoundations/open_clip/blob/main/src/open_clip/model.py#L201 + # return: [B, *, num_classes] + x = F.normalize(x, dim=-1) + logit_scale = torch.clamp(logit_scale.exp(), max=100) + if len(text_classifier.shape) == 2: + pred_logits = logit_scale * x @ text_classifier.T # B, *, N + 1 + else: + pred_logits = logit_scale * x @ text_classifier.permute(0,2,1) # B, *, N + 1 + # max ensembel as in OpenSeg/ODISE + if pred_logits.shape[2] != 1204 and pred_logits.shape[2] != 366: + final_pred_logits = [] + cur_idx = 0 + for num_t in num_templates: + final_pred_logits.append(pred_logits[:, :, cur_idx: cur_idx + num_t].max(-1).values) + cur_idx += num_t + final_pred_logits.append(pred_logits[:, :, -1]) # the last classifier is for void + final_pred_logits = torch.stack(final_pred_logits, dim=-1) + else: + final_pred_logits = pred_logits + return final_pred_logits \ No newline at end of file diff --git a/mask_adapter/modeling/.DS_Store b/mask_adapter/modeling/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..b81552e861cc18d038dcaee13c31803c6c6b0504 Binary files /dev/null and b/mask_adapter/modeling/.DS_Store differ diff --git a/mask_adapter/modeling/__init__.py b/mask_adapter/modeling/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..08f0fe6acef28ac21adad27c792bf3c74fec12d7 --- /dev/null +++ b/mask_adapter/modeling/__init__.py @@ -0,0 +1,17 @@ +""" +Copyright (2023) Bytedance Ltd. and/or its affiliates + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from .backbone.clip import CLIP +from .meta_arch.mask_adapter_head import MASKAdapterHead diff --git a/mask_adapter/modeling/__pycache__/__init__.cpython-310.pyc b/mask_adapter/modeling/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3e7bda800cd30b89d65d0acff682b20a1ddce640 Binary files /dev/null and b/mask_adapter/modeling/__pycache__/__init__.cpython-310.pyc differ diff --git a/mask_adapter/modeling/__pycache__/__init__.cpython-38.pyc b/mask_adapter/modeling/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2a639188d1e53edf84f26ab6db0dfe14cdf0ff73 Binary files /dev/null and b/mask_adapter/modeling/__pycache__/__init__.cpython-38.pyc differ diff --git a/mask_adapter/modeling/__pycache__/criterion.cpython-310.pyc b/mask_adapter/modeling/__pycache__/criterion.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..764e0422d792ce21d0ce83299f31c957095b3382 Binary files /dev/null and b/mask_adapter/modeling/__pycache__/criterion.cpython-310.pyc differ diff --git a/mask_adapter/modeling/__pycache__/criterion.cpython-38.pyc b/mask_adapter/modeling/__pycache__/criterion.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8cc4e9b7fd15e4781c82744c35fd9d41169c8623 Binary files /dev/null and b/mask_adapter/modeling/__pycache__/criterion.cpython-38.pyc differ diff --git a/mask_adapter/modeling/__pycache__/matcher.cpython-310.pyc b/mask_adapter/modeling/__pycache__/matcher.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4fe1f05ec17b0a960a22156c0d1e5defda7aaa2d Binary files /dev/null and b/mask_adapter/modeling/__pycache__/matcher.cpython-310.pyc differ diff --git a/mask_adapter/modeling/__pycache__/matcher.cpython-38.pyc b/mask_adapter/modeling/__pycache__/matcher.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d5371cd939e2e8d5938c956b4893a6c0695d22bb Binary files /dev/null and b/mask_adapter/modeling/__pycache__/matcher.cpython-38.pyc differ diff --git a/mask_adapter/modeling/backbone/__init__.py b/mask_adapter/modeling/backbone/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..be75f0cd9568f901b3174ecfb43c0b9f4fa1f77d --- /dev/null +++ b/mask_adapter/modeling/backbone/__init__.py @@ -0,0 +1,15 @@ +""" +Copyright (2023) Bytedance Ltd. and/or its affiliates + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" \ No newline at end of file diff --git a/mask_adapter/modeling/backbone/__pycache__/__init__.cpython-310.pyc b/mask_adapter/modeling/backbone/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e71f344d63bf658f6e998a268ee6b8dff5ebf557 Binary files /dev/null and b/mask_adapter/modeling/backbone/__pycache__/__init__.cpython-310.pyc differ diff --git a/mask_adapter/modeling/backbone/__pycache__/__init__.cpython-38.pyc b/mask_adapter/modeling/backbone/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e1308e37150648316c3dcb567f8dda356c8c1bcd Binary files /dev/null and b/mask_adapter/modeling/backbone/__pycache__/__init__.cpython-38.pyc differ diff --git a/mask_adapter/modeling/backbone/__pycache__/clip.cpython-310.pyc b/mask_adapter/modeling/backbone/__pycache__/clip.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ab6a2ee965d0b11b9428762a6571db7bf433f63f Binary files /dev/null and b/mask_adapter/modeling/backbone/__pycache__/clip.cpython-310.pyc differ diff --git a/mask_adapter/modeling/backbone/__pycache__/clip.cpython-38.pyc b/mask_adapter/modeling/backbone/__pycache__/clip.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a6d9d76abb34dd3073afe391dbe5c2884f34dd40 Binary files /dev/null and b/mask_adapter/modeling/backbone/__pycache__/clip.cpython-38.pyc differ diff --git a/mask_adapter/modeling/backbone/__pycache__/simple_tokenizer.cpython-310.pyc b/mask_adapter/modeling/backbone/__pycache__/simple_tokenizer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..945327f04f86ad629779597e1d969be94c70bf5e Binary files /dev/null and b/mask_adapter/modeling/backbone/__pycache__/simple_tokenizer.cpython-310.pyc differ diff --git a/mask_adapter/modeling/backbone/__pycache__/simple_tokenizer.cpython-38.pyc b/mask_adapter/modeling/backbone/__pycache__/simple_tokenizer.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..44754fd189a3c20c61b86aec0d82f68e0f28e7c4 Binary files /dev/null and b/mask_adapter/modeling/backbone/__pycache__/simple_tokenizer.cpython-38.pyc differ diff --git a/mask_adapter/modeling/backbone/clip.py b/mask_adapter/modeling/backbone/clip.py new file mode 100644 index 0000000000000000000000000000000000000000..6937174e6ffd5dc4772ce79ea64ef164d460c3d7 --- /dev/null +++ b/mask_adapter/modeling/backbone/clip.py @@ -0,0 +1,233 @@ +""" +Copyright (2023) Bytedance Ltd. and/or its affiliates + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import torch +import torch.nn.functional as F +import math +from detectron2.utils import comm + +import open_clip + +from detectron2.modeling import BACKBONE_REGISTRY, Backbone, ShapeSpec + +@BACKBONE_REGISTRY.register() +class CLIP(Backbone): + def __init__(self, cfg, input_shape): + super().__init__() + model_name = cfg.MODEL.FC_CLIP.CLIP_MODEL_NAME + pretrained= cfg.MODEL.FC_CLIP.CLIP_PRETRAINED_WEIGHTS + # download on local rank 0 first + if comm.get_local_rank() == 0: + open_clip.create_model_and_transforms(model_name, pretrained=pretrained) + comm.synchronize() + + self.model_name = model_name + self.pretrained = pretrained + + self.clip_model, _, _ = open_clip.create_model_and_transforms(model_name, pretrained=pretrained) + self.text_tokenizer = open_clip.get_tokenizer(model_name) + + model_name = model_name.lower() + if 'convnext_' in model_name: + self.model_type = 'convnext' + if '_base' in model_name: + self.output_channels = [128, 128, 256, 512, 1024] + elif '_large' in model_name: + self.output_channels = [192, 192, 384, 768, 1536] + elif '_xxlarge' in model_name: + self.output_channels = [384, 384, 768, 1536, 3072] + + elif 'rn' in model_name: + self.model_type = 'resnet' + if model_name.replace('-quickgelu', '') in ['rn50', 'rn101']: + self.output_channels = [64, 256, 512, 1024, 2048] + elif model_name == 'rn50x4': + self.output_channels = [80, 320, 640, 1280, 2560] + elif model_name == 'rn50x16': + self.output_channels = [96, 384, 768, 1536, 3072] + elif model_name == 'rn50x64': + self.output_channels = [128, 512, 1024, 2048, 4096] + + self._out_feature_strides = { + "stem": 2, + "res2": 4, + "res3": 8, + "res4": 16, + "res5": 32, + "clip_embedding": -1 + } + self._out_feature_channels = { + "stem": self.output_channels[0], + "res2": self.output_channels[1], + "res3": self.output_channels[2], + "res4": self.output_channels[3], + "res5": self.output_channels[4], + "clip_embedding": self.dim_latent + } + + self.eval() + self.freeze_everything() + + def freeze_everything(self): + for param in self.clip_model.parameters(): + param.requires_grad = False + + def encode_text(self, text, normalize: bool = False): + cast_dtype = self.clip_model.transformer.get_cast_dtype() + + x = self.clip_model.token_embedding(text).to(cast_dtype) # [batch_size, n_ctx, d_model] + + x = x + self.clip_model.positional_embedding.to(cast_dtype) + x = x.permute(1, 0, 2) # NLD -> LND + x = self.clip_model.transformer(x, attn_mask=self.clip_model.attn_mask) + x = x.permute(1, 0, 2) # LND -> NLD + x = self.clip_model.ln_final(x) # [batch_size, n_ctx, transformer.width] + # take features from the eot embedding (eot_token is the highest number in each sequence) + x = x[torch.arange(x.shape[0]), text.argmax(dim=-1)] @ self.clip_model.text_projection + return F.normalize(x, dim=-1) if normalize else x + + def tokenize_text(self, text): + return self.text_tokenizer(text) + + def extract_features(self, x): + return { + 'convnext': self.extract_features_convnext, + 'resnet': self.extract_features_resnet, + }[self.model_type](x) + + def visual_prediction_forward(self, x, masks=None): + return { + 'convnext': self.visual_prediction_forward_convnext, + 'resnet': self.visual_prediction_forward_resnet, + }[self.model_type](x, masks) + + def extract_features_convnext(self, x): + out = {} + x = self.clip_model.visual.trunk.stem(x) + out['stem'] = x.contiguous() # os4 + for i in range(4): + x = self.clip_model.visual.trunk.stages[i](x) + out[f'res{i+2}'] = x.contiguous() # res 2 (os4), 3 (os8), 4 (os16), 5 (os32) + + x = self.clip_model.visual.trunk.norm_pre(x) + out['clip_vis_dense'] = x.contiguous() + return out + + def extract_features_resnet(self, x): + out = {} + x = self.clip_model.visual.act1(self.clip_model.visual.bn1(self.clip_model.visual.conv1(x))) + x = self.clip_model.visual.act2(self.clip_model.visual.bn2(self.clip_model.visual.conv2(x))) + x = self.clip_model.visual.act3(self.clip_model.visual.bn3(self.clip_model.visual.conv3(x))) + out['stem'] = x.contiguous() # os2 + x = self.clip_model.visual.avgpool(x) + x = self.clip_model.visual.layer1(x) + out['res2'] = x.contiguous() # os4 + x = self.clip_model.visual.layer2(x) + out['res3'] = x.contiguous() # os8 + x = self.clip_model.visual.layer3(x) + out['res4'] = x.contiguous() # os16 + x = self.clip_model.visual.layer4(x) + out['res5'] = x.contiguous() # os32 + out['clip_vis_dense'] = x + return out + + def visual_prediction_forward_convnext(self, x, masks): + batch, num_query, channel = x.shape + x = x.reshape(batch*num_query, channel, 1, 1) # fake 2D input + x = self.clip_model.visual.trunk.head(x) + x = self.clip_model.visual.head(x) + return x.view(batch, num_query, x.shape[-1]) # B x num_queries x 640 + + def visual_prediction_forward_resnet(self, x, masks): + batch, channel, height, width = x.shape + if masks.shape[-2] != height or masks.shape[-1] != width: + masks = F.inteprolate(masks, size=(height, width), mode='bilinear', align_corners=False) + num_masks = masks.shape[1] + + positional_embedding = self.clip_model.visual.attnpool.positional_embedding.to(x.dtype) + spatial_pos_embed = positional_embedding[1:, None, :] # HW x 1 x C + orig_size = int(math.sqrt(spatial_pos_embed.shape[0])) + spatial_pos_embed = spatial_pos_embed.permute(1, 2, 0).reshape(1, channel, orig_size, orig_size) + spatial_pos_embed = F.interpolate(spatial_pos_embed, size=(height, width), mode='bilinear', align_corners=False) # 1 x C x H x W + spatial_pos_embed = spatial_pos_embed.permute(2, 3, 0, 1).reshape(height*width, 1, channel) + x = x.reshape(batch, channel, height * width).permute(2, 0, 1) # BCHW -> (HW)BC + key_value = x + spatial_pos_embed + + masks = masks.reshape(batch, num_masks, height * width) + masks = (masks > 0).to(masks.dtype) + query = x.mean(0, keepdim=True) + positional_embedding[:1, None, :] + query = query.repeat_interleave(num_masks, dim=0) + + attn_mask = masks < 0.5 + attn_mask = attn_mask.unsqueeze(1).expand(-1, self.clip_model.visual.attnpool.num_heads, -1, -1) + attn_mask = attn_mask.reshape(batch * self.clip_model.visual.attnpool.num_heads, + query.shape[0], key_value.shape[0]) + + x = F.multi_head_attention_forward( + query=query, key=key_value, value=key_value, + embed_dim_to_check=key_value.shape[-1], + num_heads=self.clip_model.visual.attnpool.num_heads, + q_proj_weight=self.clip_model.visual.attnpool.q_proj.weight, + k_proj_weight=self.clip_model.visual.attnpool.k_proj.weight, + v_proj_weight=self.clip_model.visual.attnpool.v_proj.weight, + in_proj_weight=None, + in_proj_bias=torch.cat([self.clip_model.visual.attnpool.q_proj.bias, + self.clip_model.visual.attnpool.k_proj.bias, + self.clip_model.visual.attnpool.v_proj.bias]), + bias_k=None, + bias_v=None, + add_zero_attn=False, + dropout_p=0., + out_proj_weight=self.clip_model.visual.attnpool.c_proj.weight, + out_proj_bias=self.clip_model.visual.attnpool.c_proj.bias, + use_separate_proj_weight=True, + training=self.clip_model.visual.attnpool.training, + need_weights=False, + attn_mask=attn_mask + )[0].permute(1, 0, 2) # B x N x C + + return x + + def get_text_classifier(self, text_list, device): + self.eval() + with torch.no_grad(): + # reference for templates: https://github.com/mlfoundations/open_clip/blob/91f6cce16b7bee90b3b5d38ca305b5b3b67cc200/src/training/imagenet_zeroshot_data.py + text_tokens = self.tokenize_text(text_list) + text_tokens = text_tokens.to(device) + # we return un-normalized text feature. + text_features = self.encode_text(text_tokens, normalize=False) + return text_features + + def forward(self, x): + self.eval() + with torch.no_grad(): + return self.extract_features(x) + + @property + def dim_latent(self): + return self.clip_model.text_projection.shape[-1] + + def output_shape(self): + return { + name: ShapeSpec( + channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] + ) + for name in ["stem", "res2", "res3", "res4", "res5", "clip_embedding"] + } + + @property + def size_divisibility(self): + return -1 \ No newline at end of file diff --git a/mask_adapter/modeling/maft/__pycache__/content_dependent_transfer.cpython-310.pyc b/mask_adapter/modeling/maft/__pycache__/content_dependent_transfer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..309e0ba9495b9b44e4c4bc460d608dbbc4a71e25 Binary files /dev/null and b/mask_adapter/modeling/maft/__pycache__/content_dependent_transfer.cpython-310.pyc differ diff --git a/mask_adapter/modeling/maft/__pycache__/content_dependent_transfer.cpython-38.pyc b/mask_adapter/modeling/maft/__pycache__/content_dependent_transfer.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ae8987e34f075453fbdd626aefa35885a9060b42 Binary files /dev/null and b/mask_adapter/modeling/maft/__pycache__/content_dependent_transfer.cpython-38.pyc differ diff --git a/mask_adapter/modeling/maft/content_dependent_transfer.py b/mask_adapter/modeling/maft/content_dependent_transfer.py new file mode 100644 index 0000000000000000000000000000000000000000..e1d6d78b531349b3481bc891de9a94c84f0f0505 --- /dev/null +++ b/mask_adapter/modeling/maft/content_dependent_transfer.py @@ -0,0 +1,134 @@ +import math +import torch +from torch import nn, Tensor +from torch.nn import functional as F +from typing import Optional + + + + + + + +class ShortCut_CrossAttention(nn.Module): + + def __init__(self, d_model, nhead, panoptic_on = False): + super().__init__() + self.multihead_attn = nn.MultiheadAttention(d_model, nhead, dropout=0.0) + self.norm = nn.LayerNorm(d_model) + self.activation = F.relu + + self._reset_parameters() + + self.MLP = nn.Linear(d_model, d_model) + self.panoptic_on = panoptic_on + if panoptic_on: + nn.init.constant(self.MLP.weight, 0.0) + nn.init.constant(self.MLP.bias, 0.0) + + + def _reset_parameters(self): + for p in self.parameters(): + if p.dim() > 1: + nn.init.xavier_uniform_(p) + + def with_pos_embed(self, tensor, pos: Optional[Tensor]): + return tensor if pos is None else tensor + pos + + def forward(self, tgt, memory, + memory_mask: Optional[Tensor] = None, + memory_key_padding_mask: Optional[Tensor] = None, + pos: Optional[Tensor] = None, + query_pos: Optional[Tensor] = None): + tgt2 = self.multihead_attn(query=self.with_pos_embed(tgt, query_pos), + key=self.with_pos_embed(memory, pos), + value=memory, attn_mask=memory_mask, + key_padding_mask=memory_key_padding_mask)[0] + + if self.panoptic_on: + tgt = tgt + self.norm(self.MLP(tgt2)) + else: + tgt = self.norm(tgt + self.MLP(tgt2)) + + return tgt + + + +class ContentDependentTransfer(nn.Module): + + def __init__(self, d_model, nhead, panoptic_on): + super().__init__() + self.pe_layer = PositionEmbeddingSine(d_model//2, normalize=True) + self.cross_atten = ShortCut_CrossAttention(d_model = d_model, nhead = nhead, panoptic_on = panoptic_on) + + def visual_prediction_forward_convnext(self, x): + batch, channel, h, w = x.shape + x = x.reshape(batch*h*w, channel).unsqueeze(-1).unsqueeze(-1) # fake 2D input + x = self.truck_head(x) + x = self.head(x) + return x.reshape(batch, h, w, x.shape[-1]).permute(0,3,1,2) # B x num_queries x 640 + + + def forward(self, img_feat, text_classifier, ): + text_classifier = text_classifier.unsqueeze(0).repeat(img_feat.shape[0],1,1) + + pos = self.pe_layer(img_feat, None).flatten(2).permute(2, 0, 1) # hw * b * c + img_feat = img_feat.flatten(2).permute(2, 0, 1) # hw * b * c + + bias = self.cross_atten(text_classifier.permute(1, 0, 2), img_feat, memory_mask=None, memory_key_padding_mask=None, pos=pos, query_pos=None) + + return bias.permute(1, 0, 2) + +class PositionEmbeddingSine(nn.Module): + """ + This is a more standard version of the position embedding, very similar to the one + used by the Attention is all you need paper, generalized to work on images. + """ + + def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None): + super().__init__() + self.num_pos_feats = num_pos_feats + self.temperature = temperature + self.normalize = normalize + if scale is not None and normalize is False: + raise ValueError("normalize should be True if scale is passed") + if scale is None: + scale = 2 * math.pi + self.scale = scale + + def forward(self, x, mask=None): + if mask is None: + mask = torch.zeros((x.size(0), x.size(2), x.size(3)), device=x.device, dtype=torch.bool) + not_mask = ~mask + y_embed = not_mask.cumsum(1, dtype=torch.float32) + x_embed = not_mask.cumsum(2, dtype=torch.float32) + if self.normalize: + eps = 1e-6 + y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale + x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale + + dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device) + dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats) + + pos_x = x_embed[:, :, :, None] / dim_t + pos_y = y_embed[:, :, :, None] / dim_t + pos_x = torch.stack( + (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4 + ).flatten(3) + pos_y = torch.stack( + (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4 + ).flatten(3) + pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2) + return pos + + def __repr__(self, _repr_indent=4): + head = "Positional encoding " + self.__class__.__name__ + body = [ + "num_pos_feats: {}".format(self.num_pos_feats), + "temperature: {}".format(self.temperature), + "normalize: {}".format(self.normalize), + "scale: {}".format(self.scale), + ] + # _repr_indent = 4 + lines = [head] + [" " * _repr_indent + line for line in body] + return "\n".join(lines) diff --git a/mask_adapter/modeling/meta_arch/__init__.py b/mask_adapter/modeling/meta_arch/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..be75f0cd9568f901b3174ecfb43c0b9f4fa1f77d --- /dev/null +++ b/mask_adapter/modeling/meta_arch/__init__.py @@ -0,0 +1,15 @@ +""" +Copyright (2023) Bytedance Ltd. and/or its affiliates + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" \ No newline at end of file diff --git a/mask_adapter/modeling/meta_arch/__pycache__/__init__.cpython-310.pyc b/mask_adapter/modeling/meta_arch/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b1c1e2958458670202769a05bedbe09eeab51d5f Binary files /dev/null and b/mask_adapter/modeling/meta_arch/__pycache__/__init__.cpython-310.pyc differ diff --git a/mask_adapter/modeling/meta_arch/__pycache__/__init__.cpython-38.pyc b/mask_adapter/modeling/meta_arch/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8ea0ba67306d42c3429d096c0e9e5c55c1b4cc89 Binary files /dev/null and b/mask_adapter/modeling/meta_arch/__pycache__/__init__.cpython-38.pyc differ diff --git a/mask_adapter/modeling/meta_arch/__pycache__/convnext.cpython-310.pyc b/mask_adapter/modeling/meta_arch/__pycache__/convnext.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0bfee6e15c8090944ece489a9d0485a7d7e3b9a8 Binary files /dev/null and b/mask_adapter/modeling/meta_arch/__pycache__/convnext.cpython-310.pyc differ diff --git a/mask_adapter/modeling/meta_arch/__pycache__/convnext.cpython-38.pyc b/mask_adapter/modeling/meta_arch/__pycache__/convnext.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d9f4c488b1b17b2b0c4a37c7428e1d05c4c36e12 Binary files /dev/null and b/mask_adapter/modeling/meta_arch/__pycache__/convnext.cpython-38.pyc differ diff --git a/mask_adapter/modeling/meta_arch/__pycache__/fcclip_head.cpython-310.pyc b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_head.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..21f45f98520dfffea76e747a652584dfeb95d8a2 Binary files /dev/null and b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_head.cpython-310.pyc differ diff --git a/mask_adapter/modeling/meta_arch/__pycache__/fcclip_head.cpython-38.pyc b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_head.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..24b1b01b67b67a7b0b156637e44a83993f698609 Binary files /dev/null and b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_head.cpython-38.pyc differ diff --git a/mask_adapter/modeling/meta_arch/__pycache__/fcclip_mask_adapter.cpython-310.pyc b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_mask_adapter.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2e28266b30d8b7b2e9e3ee5ae2fd334df8a83dfa Binary files /dev/null and b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_mask_adapter.cpython-310.pyc differ diff --git a/mask_adapter/modeling/meta_arch/__pycache__/fcclip_mask_adapter.cpython-38.pyc b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_mask_adapter.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..916e0ec19ebe2fd87f43711bd5071f837b371b27 Binary files /dev/null and b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_mask_adapter.cpython-38.pyc differ diff --git a/mask_adapter/modeling/meta_arch/__pycache__/fcclip_mask_adapter_first.cpython-310.pyc b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_mask_adapter_first.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d52e3c03f74cecb6a6f2b2afbd3bd71fc36be102 Binary files /dev/null and b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_mask_adapter_first.cpython-310.pyc differ diff --git a/mask_adapter/modeling/meta_arch/__pycache__/fcclip_mask_adapter_first.cpython-38.pyc b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_mask_adapter_first.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a9a5f435547cb341ed6148d0547866eb7764b725 Binary files /dev/null and b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_mask_adapter_first.cpython-38.pyc differ diff --git a/mask_adapter/modeling/meta_arch/__pycache__/fcclip_sparse_head.cpython-38.pyc b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_sparse_head.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2867e48e495ffbc86612caded57a6938f7a1f749 Binary files /dev/null and b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_sparse_head.cpython-38.pyc differ diff --git a/mask_adapter/modeling/meta_arch/__pycache__/mask_adapter_head.cpython-310.pyc b/mask_adapter/modeling/meta_arch/__pycache__/mask_adapter_head.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..491016fe5354345828ebd2b29fa7e6f9663476c6 Binary files /dev/null and b/mask_adapter/modeling/meta_arch/__pycache__/mask_adapter_head.cpython-310.pyc differ diff --git a/mask_adapter/modeling/meta_arch/__pycache__/mask_adapter_head.cpython-38.pyc b/mask_adapter/modeling/meta_arch/__pycache__/mask_adapter_head.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..df135cbecc87364de21d32a6bdc9fe81a78c31b9 Binary files /dev/null and b/mask_adapter/modeling/meta_arch/__pycache__/mask_adapter_head.cpython-38.pyc differ diff --git a/mask_adapter/modeling/meta_arch/convnext.py b/mask_adapter/modeling/meta_arch/convnext.py new file mode 100644 index 0000000000000000000000000000000000000000..b7e6153ca9640dd8f455a54093fe07430db1cb01 --- /dev/null +++ b/mask_adapter/modeling/meta_arch/convnext.py @@ -0,0 +1,116 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +from timm.models.layers import trunc_normal_, DropPath + + +class ConvNextV2Block(nn.Module): + """ ConvNeXtV2 Block. + + Args: + dim (int): Number of input channels. + drop_path (float): Stochastic depth rate. Default: 0.0 + """ + + def __init__(self, dim, drop_path=0.): + super().__init__() + self.dwconv = nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim) # depthwise conv + self.norm = LayerNorm(dim, eps=1e-6) + self.pwconv1 = nn.Linear(dim, 4 * dim) # pointwise/1x1 convs, implemented with linear layers + self.act = nn.GELU() + self.grn = GRN(4 * dim) + self.pwconv2 = nn.Linear(4 * dim, dim) + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + + def forward(self, x): + input = x + x = self.dwconv(x) + x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C) + x = self.norm(x) + x = self.pwconv1(x) + x = self.act(x) + x = self.grn(x) + x = self.pwconv2(x) + x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W) + + x = input + self.drop_path(x) + return x + +class GRN(nn.Module): + """ GRN (Global Response Normalization) layer + """ + def __init__(self, dim): + super().__init__() + self.gamma = nn.Parameter(torch.zeros(1, 1, 1, dim)) + self.beta = nn.Parameter(torch.zeros(1, 1, 1, dim)) + + def forward(self, x): + Gx = torch.norm(x, p=2, dim=(1,2), keepdim=True) + Nx = Gx / (Gx.mean(dim=-1, keepdim=True) + 1e-6) + return self.gamma * (x * Nx) + self.beta + x + +class ConvNextBlock(nn.Module): + r""" ConvNeXt Block. There are two equivalent implementations: + (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W) + (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back + We use (2) as we find it slightly faster in PyTorch + + Args: + dim (int): Number of input channels. + drop_path (float): Stochastic depth rate. Default: 0.0 + layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6. + """ + + def __init__(self, dim, kernel_size=7, drop_path=0., layer_scale_init_value=1e-6): + super().__init__() + self.dwconv = nn.Conv2d(dim, dim, kernel_size=kernel_size, padding=kernel_size//2, groups=dim) # depthwise conv + self.norm = LayerNorm(dim, eps=1e-6) + self.pwconv1 = nn.Linear(dim, 4 * dim) # pointwise/1x1 convs, implemented with linear layers + self.act = nn.GELU() + self.pwconv2 = nn.Linear(4 * dim, dim) + self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)), + requires_grad=True) if layer_scale_init_value > 0 else None + self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity() + + def forward(self, x): + input = x + x = self.dwconv(x) + x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C) + x = self.norm(x) + x = self.pwconv1(x) + x = self.act(x) + x = self.pwconv2(x) + if self.gamma is not None: + x = self.gamma * x + x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W) + + x = input + self.drop_path(x) + return x + + +class LayerNorm(nn.Module): + r""" LayerNorm that supports two data formats: channels_last (default) or channels_first. + The ordering of the dimensions in the inputs. channels_last corresponds to inputs with + shape (batch_size, height, width, channels) while channels_first corresponds to inputs + with shape (batch_size, channels, height, width). + """ + + def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"): + super().__init__() + self.weight = nn.Parameter(torch.ones(normalized_shape)) + self.bias = nn.Parameter(torch.zeros(normalized_shape)) + self.eps = eps + self.data_format = data_format + if self.data_format not in ["channels_last", "channels_first"]: + raise NotImplementedError + self.normalized_shape = (normalized_shape,) + + def forward(self, x): + if self.data_format == "channels_last": + return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps) + elif self.data_format == "channels_first": + u = x.mean(1, keepdim=True) + s = (x - u).pow(2).mean(1, keepdim=True) + x = (x - u) / torch.sqrt(s + self.eps) + x = self.weight[:, None, None] * x + self.bias[:, None, None] + return x \ No newline at end of file diff --git a/mask_adapter/modeling/meta_arch/mask_adapter_head.py b/mask_adapter/modeling/meta_arch/mask_adapter_head.py new file mode 100644 index 0000000000000000000000000000000000000000..716db2b5b8ae2bc4be02b384cc2ab5d574c6d0a2 --- /dev/null +++ b/mask_adapter/modeling/meta_arch/mask_adapter_head.py @@ -0,0 +1,137 @@ +import logging +from copy import deepcopy +from typing import Callable, Dict, List, Optional, Tuple, Union + +import fvcore.nn.weight_init as weight_init +from torch import nn +from torch.nn import functional as F +import torch +from detectron2.config import configurable +from detectron2.layers import Conv2d, ShapeSpec, get_norm +from detectron2.modeling import SEM_SEG_HEADS_REGISTRY +import torch.utils.checkpoint as cp +from .convnext import ConvNextBlock +from einops import rearrange,repeat + +@SEM_SEG_HEADS_REGISTRY.register() +class MASKAdapterHead(nn.Module): + + @configurable + def __init__( + self, + clip_model_name, + mask_in_chans: int, + num_channels: int, + use_checkpoint: bool, + num_output_maps: int, + ): + """ + NOTE: this interface is experimental. + Args: + input_shape: shapes (channels and stride) of the input features + num_classes: number of classes to predict + pixel_decoder: the pixel decoder module + loss_weight: loss weight + ignore_value: category id to be ignored during training. + transformer_predictor: the transformer decoder that makes prediction + transformer_in_feature: input feature name to the transformer_predictor + """ + super().__init__() + self.use_checkpoint = use_checkpoint + + if '_base' in clip_model_name: + clip_dim = 640 + elif '_large' in clip_model_name: + clip_dim = 768 + + self.fuse = nn.Conv2d(clip_dim, num_channels, 1) + + self.cnext1 = ConvNextBlock(num_channels) + + self.cnext2 = ConvNextBlock(num_channels) + + self.cnext3 = ConvNextBlock(num_channels) + + self.norm = nn.LayerNorm(num_channels) + self.final = nn.Conv2d(num_channels, num_output_maps, 1) + + self.mask_downscaling = nn.Sequential( + nn.Conv2d(1, mask_in_chans // 4, kernel_size=3, stride=2, padding=1), + LayerNorm2d(mask_in_chans // 4), + nn.GELU(), + nn.Conv2d(mask_in_chans // 4, mask_in_chans, kernel_size=3, stride=2, padding=1), + LayerNorm2d(mask_in_chans), + nn.GELU(), + nn.Conv2d(mask_in_chans, clip_dim, kernel_size=1), + ) + + + @classmethod + def from_config(cls, cfg): + + return { + "clip_model_name": cfg.MODEL.FC_CLIP.CLIP_MODEL_NAME, + "mask_in_chans": cfg.MODEL.MASK_ADAPTER.MASK_IN_CHANNELS, + "num_channels": cfg.MODEL.MASK_ADAPTER.NUM_CHANNELS, + "use_checkpoint": cfg.MODEL.MASK_ADAPTER.USE_CHECKPOINT, + "num_output_maps": cfg.MODEL.MASK_ADAPTER.NUM_OUTPUT_MAPS, + } + + def forward(self, clip_feature, masks): + + + N = masks.size(1) + masks = rearrange(masks, 'B N H W -> (B N) H W').unsqueeze(dim=1) + + clip_feature = repeat(clip_feature, "B C H W -> (B N) C H W", N=N) + + H,W = clip_feature.shape[-2:] + masks = F.interpolate(masks.float(), size=(H*4,W*4), + mode='bilinear', align_corners=False) + masks = self.mask_downscaling(masks) + + outputs = clip_feature + masks + + def _inner_forward(outputs): + outputs = self.fuse(outputs) + + outputs = self.cnext1(outputs) + + outputs = self.cnext2(outputs) + + outputs = self.cnext3(outputs) + + outputs = outputs.permute(0, 2, 3, 1) + outputs = self.norm(outputs.contiguous()) + outputs = outputs.permute(0, 3, 1, 2) + + outputs = self.final(outputs.contiguous()) + + outputs = rearrange(outputs, '(B N) C H W -> B (N C) H W',N=N) + + return outputs + + if self.use_checkpoint and self.training: + outputs = cp.checkpoint(_inner_forward, outputs,use_reentrant=False) + else: + outputs = _inner_forward(outputs) + return outputs + +def build_mask_adapter(cfg,name): + return SEM_SEG_HEADS_REGISTRY.get(name)(cfg) + +# From https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py # noqa +# Itself from https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119 # noqa +class LayerNorm2d(nn.Module): + def __init__(self, num_channels: int, eps: float = 1e-6) -> None: + super().__init__() + self.weight = nn.Parameter(torch.ones(num_channels)) + self.bias = nn.Parameter(torch.zeros(num_channels)) + self.eps = eps + + def forward(self, x: torch.Tensor) -> torch.Tensor: + u = x.mean(1, keepdim=True) + s = (x - u).pow(2).mean(1, keepdim=True) + x = (x - u) / torch.sqrt(s + self.eps) + x = self.weight[:, None, None] * x + self.bias[:, None, None] + return x \ No newline at end of file diff --git a/mask_adapter/sam_maskadapter.py b/mask_adapter/sam_maskadapter.py new file mode 100644 index 0000000000000000000000000000000000000000..c5c03686a692402bf22823b6e5c23a2c550abef8 --- /dev/null +++ b/mask_adapter/sam_maskadapter.py @@ -0,0 +1,362 @@ +import numpy as np +import torch +from torch.nn import functional as F +import cv2 + +from detectron2.data import MetadataCatalog +from detectron2.structures import BitMasks +from detectron2.utils.visualizer import ColorMode, Visualizer + +import open_clip +from sam2.build_sam import build_sam2 +from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator +from .modeling.meta_arch.mask_adapter_head import build_mask_adapter +from sam2.sam2_image_predictor import SAM2ImagePredictor + + +from PIL import Image + +PIXEL_MEAN = [122.7709383, 116.7460125, 104.09373615] +PIXEL_STD = [68.5005327, 66.6321579, 70.32316305] + +class OpenVocabVisualizer(Visualizer): + def __init__(self, img_rgb, metadata=None, scale=1.0, instance_mode=ColorMode.IMAGE, class_names=None): + super().__init__(img_rgb, metadata, scale, instance_mode) + self.class_names = class_names + + def draw_sem_seg(self, sem_seg, area_threshold=None, alpha=0.6): + """ + Draw semantic segmentation predictions/labels. + Args: + sem_seg (Tensor or ndarray): the segmentation of shape (H, W). + Each value is the integer label of the pixel. + area_threshold (int): segments with less than `area_threshold` are not drawn. + alpha (float): the larger it is, the more opaque the segmentations are. + Returns: + output (VisImage): image object with visualizations. + """ + if isinstance(sem_seg, torch.Tensor): + sem_seg = sem_seg.numpy() + labels, areas = np.unique(sem_seg, return_counts=True) + sorted_idxs = np.argsort(-areas).tolist() + labels = labels[sorted_idxs] + class_names = self.class_names if self.class_names is not None else self.metadata.stuff_classes + + for label in filter(lambda l: l < len(class_names), labels): + try: + mask_color = [x / 255 for x in self.metadata.stuff_colors[label]] + except (AttributeError, IndexError): + mask_color = None + + binary_mask = (sem_seg == label).astype(np.uint8) + text = class_names[label] + self.draw_binary_mask( + binary_mask, + color=mask_color, + edge_color=(1.0, 1.0, 240.0 / 255), + text=text, + alpha=alpha, + area_threshold=area_threshold, + ) + return self.output + + +class SAMVisualizationDemo(object): + def __init__(self, cfg, granularity, sam2, clip_model ,mask_adapter, instance_mode=ColorMode.IMAGE, parallel=False,): + self.metadata = MetadataCatalog.get( + cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused" + ) + + self.cpu_device = torch.device("cpu") + self.instance_mode = instance_mode + + self.parallel = parallel + self.granularity = granularity + + self.sam2 = sam2 + self.predictor = SAM2AutomaticMaskGenerator(sam2, points_per_batch=16, + pred_iou_thresh=0.8, + stability_score_thresh=0.7, + crop_n_layers=0, + crop_n_points_downscale_factor=2, + min_mask_region_area=100) + + self.clip_model = clip_model + self.mask_adapter = mask_adapter + + + + def extract_features_convnext(self, x): + out = {} + x = self.clip_model.visual.trunk.stem(x) + out['stem'] = x.contiguous() # os4 + for i in range(4): + x = self.clip_model.visual.trunk.stages[i](x) + out[f'res{i+2}'] = x.contiguous() # res 2 (os4), 3 (os8), 4 (os16), 5 (os32) + + x = self.clip_model.visual.trunk.norm_pre(x) + out['clip_vis_dense'] = x.contiguous() + return out + + def visual_prediction_forward_convnext(self, x): + batch, num_query, channel = x.shape + x = x.reshape(batch*num_query, channel, 1, 1) # fake 2D input + x = self.clip_model.visual.trunk.head(x) + x = self.clip_model.visual.head(x) + return x.view(batch, num_query, x.shape[-1]) # B x num_queries x 640 + + def visual_prediction_forward_convnext_2d(self, x): + + clip_vis_dense = self.clip_model.visual.trunk.head.norm(x) + clip_vis_dense = self.clip_model.visual.trunk.head.drop(clip_vis_dense.permute(0, 2, 3, 1)) + clip_vis_dense = self.clip_model.visual.head(clip_vis_dense).permute(0, 3, 1, 2) + + return clip_vis_dense + + def run_on_image(self, ori_image, class_names): + height, width, _ = ori_image.shape + if width > height: + new_width = 896 + new_height = int((new_width / width) * height) + else: + new_height = 896 + new_width = int((new_height / height) * width) + image = cv2.resize(ori_image, (new_width, new_height)) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + ori_image = cv2.cvtColor(ori_image, cv2.COLOR_BGR2RGB) + visualizer = OpenVocabVisualizer(ori_image, self.metadata, instance_mode=self.instance_mode, class_names=class_names) + with torch.no_grad():#, torch.cuda.amp.autocast(): + masks = self.predictor.generate(image) + pred_masks = [masks[i]['segmentation'][None,:,:] for i in range(len(masks))] + pred_masks = np.row_stack(pred_masks) + pred_masks = BitMasks(pred_masks) + + image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) + + pixel_mean = torch.tensor(PIXEL_MEAN).view(-1, 1, 1) + pixel_std = torch.tensor(PIXEL_STD).view(-1, 1, 1) + + image = (image - pixel_mean) / pixel_std + + image = image.unsqueeze(0) + + if len(class_names) == 1: + class_names.append('others') + txts = [f'a photo of {cls_name}' for cls_name in class_names] + text = open_clip.tokenize(txts) + + + with torch.no_grad(): + self.clip_model.cuda() + text_features = self.clip_model.encode_text(text.cuda()) + text_features /= text_features.norm(dim=-1, keepdim=True) + + features = self.extract_features_convnext(image.cuda().float()) + + clip_feature = features['clip_vis_dense'] + + clip_vis_dense = self.visual_prediction_forward_convnext_2d(clip_feature) + + semantic_activation_maps = self.mask_adapter(clip_vis_dense, pred_masks.tensor.unsqueeze(0).float().cuda()) + + maps_for_pooling = F.interpolate(semantic_activation_maps, size=clip_feature.shape[-2:], + mode='bilinear', align_corners=False) + + B, C = clip_feature.size(0),clip_feature.size(1) + N = maps_for_pooling.size(1) + num_instances = N // 16 + maps_for_pooling = F.softmax(F.logsigmoid(maps_for_pooling).view(B, N,-1), dim=-1) + pooled_clip_feature = torch.bmm(maps_for_pooling, clip_feature.view(B, C, -1).permute(0, 2, 1)) + pooled_clip_feature = self.visual_prediction_forward_convnext(pooled_clip_feature) + pooled_clip_feature = (pooled_clip_feature.reshape(B,num_instances, 16, -1).mean(dim=-2).contiguous()) + + class_preds = (100.0 * pooled_clip_feature @ text_features.T).softmax(dim=-1) + class_preds = class_preds.squeeze(0) + select_cls = torch.zeros_like(class_preds) + + max_scores, select_mask = torch.max(class_preds, dim=0) + if len(class_names) == 2 and class_names[-1] == 'others': + select_mask = select_mask[:-1] + if self.granularity < 1: + thr_scores = max_scores * self.granularity + select_mask = [] + if len(class_names) == 2 and class_names[-1] == 'others': + thr_scores = thr_scores[:-1] + for i, thr in enumerate(thr_scores): + cls_pred = class_preds[:,i] + locs = torch.where(cls_pred > thr) + select_mask.extend(locs[0].tolist()) + for idx in select_mask: + select_cls[idx] = class_preds[idx] + semseg = torch.einsum("qc,qhw->chw", select_cls.float(), pred_masks.tensor.float().cuda()) + + r = semseg + blank_area = (r[0] == 0) + pred_mask = r.argmax(dim=0).to('cpu') + pred_mask[blank_area] = 255 + pred_mask = np.array(pred_mask, dtype=int) + pred_mask = cv2.resize(pred_mask, (width, height), interpolation=cv2.INTER_NEAREST) + + vis_output = visualizer.draw_sem_seg( + pred_mask + ) + + return None, vis_output + + + +class SAMPointVisualizationDemo(object): + def __init__(self, cfg, granularity, sam2, clip_model ,mask_adapter, instance_mode=ColorMode.IMAGE, parallel=False): + self.metadata = MetadataCatalog.get( + cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused" + ) + + self.cpu_device = torch.device("cpu") + self.instance_mode = instance_mode + + self.parallel = parallel + self.granularity = granularity + + + self.sam2 = sam2 + + self.predictor = SAM2ImagePredictor(sam2) + + self.clip_model = clip_model + + self.mask_adapter = mask_adapter + + + from .data.datasets import openseg_classes + + COCO_CATEGORIES_pan = openseg_classes.get_coco_categories_with_prompt_eng() + #COCO_CATEGORIES_seg = openseg_classes.get_coco_stuff_categories_with_prompt_eng() + + thing_classes = [k["name"] for k in COCO_CATEGORIES_pan if k["isthing"] == 1] + stuff_classes = [k["name"] for k in COCO_CATEGORIES_pan] + #print(coco_metadata) + lvis_classes = open("./mask_adapter/data/datasets/lvis_1203_with_prompt_eng.txt", 'r').read().splitlines() + lvis_classes = [x[x.find(':')+1:] for x in lvis_classes] + + self.class_names = thing_classes + stuff_classes + lvis_classes + self.text_embedding = torch.from_numpy(np.load("./text_embedding/lvis_coco_text_embedding.npy")).to("cuda") + + self.class_names = self._load_class_names() + + def _load_class_names(self): + from .data.datasets import openseg_classes + COCO_CATEGORIES_pan = openseg_classes.get_coco_categories_with_prompt_eng() + thing_classes = [k["name"] for k in COCO_CATEGORIES_pan if k["isthing"] == 1] + stuff_classes = [k["name"] for k in COCO_CATEGORIES_pan] + lvis_classes = open("./mask_adapter/data/datasets/lvis_1203_with_prompt_eng.txt", 'r').read().splitlines() + lvis_classes = [x[x.find(':')+1:] for x in lvis_classes] + return thing_classes + stuff_classes + lvis_classes + + + def extract_features_convnext(self, x): + out = {} + x = self.clip_model.visual.trunk.stem(x) + out['stem'] = x.contiguous() # os4 + for i in range(4): + x = self.clip_model.visual.trunk.stages[i](x) + out[f'res{i+2}'] = x.contiguous() # res 2 (os4), 3 (os8), 4 (os16), 5 (os32) + + x = self.clip_model.visual.trunk.norm_pre(x) + out['clip_vis_dense'] = x.contiguous() + return out + + def visual_prediction_forward_convnext(self, x): + batch, num_query, channel = x.shape + x = x.reshape(batch*num_query, channel, 1, 1) # fake 2D input + x = self.clip_model.visual.trunk.head(x) + x = self.clip_model.visual.head(x) + return x.view(batch, num_query, x.shape[-1]) # B x num_queries x 640 + + def visual_prediction_forward_convnext_2d(self, x): + + clip_vis_dense = self.clip_model.visual.trunk.head.norm(x) + clip_vis_dense = self.clip_model.visual.trunk.head.drop(clip_vis_dense.permute(0, 2, 3, 1)) + clip_vis_dense = self.clip_model.visual.head(clip_vis_dense).permute(0, 3, 1, 2) + + return clip_vis_dense + + def run_on_image_with_points(self, ori_image, points): + height, width, _ = ori_image.shape + + image = ori_image + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + ori_image = cv2.cvtColor(ori_image, cv2.COLOR_BGR2RGB) + + input_point = np.array(points) + input_label = np.array([1]) + + with torch.no_grad(): + self.predictor.set_image(image) + masks, _, _ = self.predictor.predict(point_coords=input_point, point_labels=input_label, multimask_output=False) + + pred_masks = BitMasks(masks) + + image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) + + pixel_mean = torch.tensor(PIXEL_MEAN).view(-1, 1, 1) + pixel_std = torch.tensor(PIXEL_STD).view(-1, 1, 1) + + image = (image - pixel_mean) / pixel_std + image = image.unsqueeze(0) + + # txts = [f'a photo of {cls_name}' for cls_name in self.class_names] + # text = open_clip.tokenize(txts) + + with torch.no_grad(): + self.clip_model.cuda() + # text_features = self.clip_model.encode_text(text.cuda()) + # text_features /= text_features.norm(dim=-1, keepdim=True) + #np.save("/home/yongkangli/Mask-Adapter/text_embedding/lvis_coco_text_embedding.npy", text_features.cpu().numpy()) + text_features = self.text_embedding + features = self.extract_features_convnext(image.cuda().float()) + clip_feature = features['clip_vis_dense'] + + clip_vis_dense = self.visual_prediction_forward_convnext_2d(clip_feature) + + semantic_activation_maps = self.mask_adapter(clip_vis_dense, pred_masks.tensor.unsqueeze(0).float().cuda()) + maps_for_pooling = F.interpolate(semantic_activation_maps, size=clip_feature.shape[-2:], mode='bilinear', align_corners=False) + + B, C = clip_feature.size(0), clip_feature.size(1) + N = maps_for_pooling.size(1) + num_instances = N // 16 + maps_for_pooling = F.softmax(F.logsigmoid(maps_for_pooling).view(B, N,-1), dim=-1) + pooled_clip_feature = torch.bmm(maps_for_pooling, clip_feature.view(B, C, -1).permute(0, 2, 1)) + pooled_clip_feature = self.visual_prediction_forward_convnext(pooled_clip_feature) + pooled_clip_feature = (pooled_clip_feature.reshape(B, num_instances, 16, -1).mean(dim=-2).contiguous()) + + class_preds = (100.0 * pooled_clip_feature @ text_features.T).softmax(dim=-1) + class_preds = class_preds.squeeze(0) + + # Resize mask to match original image size + pred_mask = cv2.resize(masks.squeeze(0), (width, height), interpolation=cv2.INTER_NEAREST) # Resize mask to match original image size + + # Create an overlay for the mask with a transparent background (using alpha transparency) + overlay = ori_image.copy() + mask_colored = np.zeros_like(ori_image) + mask_colored[pred_mask == 1] = [234, 103, 112] # Green color for the mask + + # Apply the mask with transparency (alpha blending) + alpha = 0.5 + cv2.addWeighted(mask_colored, alpha, overlay, 1 - alpha, 0, overlay) + + # Draw boundary (contours) on the overlay + contours, _ = cv2.findContours(pred_mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + cv2.drawContours(overlay, contours, -1, (255, 255, 255), 2) # White boundary + + # Add label based on the class with the highest score + max_scores, max_score_idx = class_preds.max(dim=1) # Find the max score across the class predictions + label = f"{self.class_names[max_score_idx.item()]}: {max_scores.item():.2f}" + + # Dynamically place the label near the clicked point + text_x = min(width - 200, points[0][0] + 20) # Add some offset from the point + text_y = min(height - 30, points[0][1] + 20) # Ensure the text does not go out of bounds + + # Put text near the point + cv2.putText(overlay, label, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2) + + return None, Image.fromarray(overlay) \ No newline at end of file diff --git a/mask_adapter/test_time_augmentation.py b/mask_adapter/test_time_augmentation.py new file mode 100644 index 0000000000000000000000000000000000000000..76794c4589f196680363cc26dfacfbe0dd7c689d --- /dev/null +++ b/mask_adapter/test_time_augmentation.py @@ -0,0 +1,108 @@ +""" +This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”). +All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. + +Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/test_time_augmentation.py +""" +import copy +import logging +from itertools import count + +import numpy as np +import torch +from fvcore.transforms import HFlipTransform +from torch import nn +from torch.nn.parallel import DistributedDataParallel + +from detectron2.data.detection_utils import read_image +from detectron2.modeling import DatasetMapperTTA + + +__all__ = [ + "SemanticSegmentorWithTTA", +] + + +class SemanticSegmentorWithTTA(nn.Module): + """ + A SemanticSegmentor with test-time augmentation enabled. + Its :meth:`__call__` method has the same interface as :meth:`SemanticSegmentor.forward`. + """ + + def __init__(self, cfg, model, tta_mapper=None, batch_size=1): + """ + Args: + cfg (CfgNode): + model (SemanticSegmentor): a SemanticSegmentor to apply TTA on. + tta_mapper (callable): takes a dataset dict and returns a list of + augmented versions of the dataset dict. Defaults to + `DatasetMapperTTA(cfg)`. + batch_size (int): batch the augmented images into this batch size for inference. + """ + super().__init__() + if isinstance(model, DistributedDataParallel): + model = model.module + self.cfg = cfg.clone() + + self.model = model + + if tta_mapper is None: + tta_mapper = DatasetMapperTTA(cfg) + self.tta_mapper = tta_mapper + self.batch_size = batch_size + + def __call__(self, batched_inputs): + """ + Same input/output format as :meth:`SemanticSegmentor.forward` + """ + + def _maybe_read_image(dataset_dict): + ret = copy.copy(dataset_dict) + if "image" not in ret: + image = read_image(ret.pop("file_name"), self.model.input_format) + image = torch.from_numpy(np.ascontiguousarray(image.transpose(2, 0, 1))) # CHW + ret["image"] = image + if "height" not in ret and "width" not in ret: + ret["height"] = image.shape[1] + ret["width"] = image.shape[2] + return ret + + processed_results = [] + for x in batched_inputs: + result = self._inference_one_image(_maybe_read_image(x)) + processed_results.append(result) + return processed_results + + def _inference_one_image(self, input): + """ + Args: + input (dict): one dataset dict with "image" field being a CHW tensor + Returns: + dict: one output dict + """ + orig_shape = (input["height"], input["width"]) + augmented_inputs, tfms = self._get_augmented_inputs(input) + + final_predictions = None + count_predictions = 0 + for input, tfm in zip(augmented_inputs, tfms): + count_predictions += 1 + with torch.no_grad(): + if final_predictions is None: + if any(isinstance(t, HFlipTransform) for t in tfm.transforms): + final_predictions = self.model([input])[0].pop("sem_seg").flip(dims=[2]) + else: + final_predictions = self.model([input])[0].pop("sem_seg") + else: + if any(isinstance(t, HFlipTransform) for t in tfm.transforms): + final_predictions += self.model([input])[0].pop("sem_seg").flip(dims=[2]) + else: + final_predictions += self.model([input])[0].pop("sem_seg") + + final_predictions = final_predictions / count_predictions + return {"sem_seg": final_predictions} + + def _get_augmented_inputs(self, input): + augmented_inputs = self.tta_mapper(input) + tfms = [x.pop("transforms") for x in augmented_inputs] + return augmented_inputs, tfms diff --git a/mask_adapter/utils/__init__.py b/mask_adapter/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..be75f0cd9568f901b3174ecfb43c0b9f4fa1f77d --- /dev/null +++ b/mask_adapter/utils/__init__.py @@ -0,0 +1,15 @@ +""" +Copyright (2023) Bytedance Ltd. and/or its affiliates + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" \ No newline at end of file diff --git a/mask_adapter/utils/__pycache__/__init__.cpython-310.pyc b/mask_adapter/utils/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..46eba9a9eb95435f6fab13b818d35016218ced10 Binary files /dev/null and b/mask_adapter/utils/__pycache__/__init__.cpython-310.pyc differ diff --git a/mask_adapter/utils/__pycache__/__init__.cpython-38.pyc b/mask_adapter/utils/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..81ba5c9d803b2aa4f99879c53e5ef4b335f1ceca Binary files /dev/null and b/mask_adapter/utils/__pycache__/__init__.cpython-38.pyc differ diff --git a/mask_adapter/utils/__pycache__/misc.cpython-310.pyc b/mask_adapter/utils/__pycache__/misc.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d43fd81cc0559678a6bfebba5a6d251815cf63c5 Binary files /dev/null and b/mask_adapter/utils/__pycache__/misc.cpython-310.pyc differ diff --git a/mask_adapter/utils/__pycache__/misc.cpython-38.pyc b/mask_adapter/utils/__pycache__/misc.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d170ba8f366e04c12b44aa73e4f75258436b7d25 Binary files /dev/null and b/mask_adapter/utils/__pycache__/misc.cpython-38.pyc differ diff --git a/mask_adapter/utils/misc.py b/mask_adapter/utils/misc.py new file mode 100644 index 0000000000000000000000000000000000000000..1420c9d568b2475dfd0a4bef92010be4015845b5 --- /dev/null +++ b/mask_adapter/utils/misc.py @@ -0,0 +1,114 @@ +""" +This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”). +All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. + +Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/utils/misc.py + +Misc functions, including distributed helpers. + +Mostly copy-paste from torchvision references. +""" +from typing import List, Optional + +import torch +import torch.distributed as dist +import torchvision +from torch import Tensor + + +def _max_by_axis(the_list): + # type: (List[List[int]]) -> List[int] + maxes = the_list[0] + for sublist in the_list[1:]: + for index, item in enumerate(sublist): + maxes[index] = max(maxes[index], item) + return maxes + + +class NestedTensor(object): + def __init__(self, tensors, mask: Optional[Tensor]): + self.tensors = tensors + self.mask = mask + + def to(self, device): + # type: (Device) -> NestedTensor # noqa + cast_tensor = self.tensors.to(device) + mask = self.mask + if mask is not None: + assert mask is not None + cast_mask = mask.to(device) + else: + cast_mask = None + return NestedTensor(cast_tensor, cast_mask) + + def decompose(self): + return self.tensors, self.mask + + def __repr__(self): + return str(self.tensors) + + +def nested_tensor_from_tensor_list(tensor_list: List[Tensor]): + # TODO make this more general + if tensor_list[0].ndim == 3: + if torchvision._is_tracing(): + # nested_tensor_from_tensor_list() does not export well to ONNX + # call _onnx_nested_tensor_from_tensor_list() instead + return _onnx_nested_tensor_from_tensor_list(tensor_list) + + # TODO make it support different-sized images + max_size = _max_by_axis([list(img.shape) for img in tensor_list]) + # min_size = tuple(min(s) for s in zip(*[img.shape for img in tensor_list])) + batch_shape = [len(tensor_list)] + max_size + b, c, h, w = batch_shape + dtype = tensor_list[0].dtype + device = tensor_list[0].device + tensor = torch.zeros(batch_shape, dtype=dtype, device=device) + mask = torch.ones((b, h, w), dtype=torch.bool, device=device) + for img, pad_img, m in zip(tensor_list, tensor, mask): + pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) + m[: img.shape[1], : img.shape[2]] = False + else: + raise ValueError("not supported") + return NestedTensor(tensor, mask) + + +# _onnx_nested_tensor_from_tensor_list() is an implementation of +# nested_tensor_from_tensor_list() that is supported by ONNX tracing. +@torch.jit.unused +def _onnx_nested_tensor_from_tensor_list(tensor_list: List[Tensor]) -> NestedTensor: + max_size = [] + for i in range(tensor_list[0].dim()): + max_size_i = torch.max( + torch.stack([img.shape[i] for img in tensor_list]).to(torch.float32) + ).to(torch.int64) + max_size.append(max_size_i) + max_size = tuple(max_size) + + # work around for + # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) + # m[: img.shape[1], :img.shape[2]] = False + # which is not yet supported in onnx + padded_imgs = [] + padded_masks = [] + for img in tensor_list: + padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))] + padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0])) + padded_imgs.append(padded_img) + + m = torch.zeros_like(img[0], dtype=torch.int, device=img.device) + padded_mask = torch.nn.functional.pad(m, (0, padding[2], 0, padding[1]), "constant", 1) + padded_masks.append(padded_mask.to(torch.bool)) + + tensor = torch.stack(padded_imgs) + mask = torch.stack(padded_masks) + + return NestedTensor(tensor, mask=mask) + + +def is_dist_avail_and_initialized(): + if not dist.is_available(): + return False + if not dist.is_initialized(): + return False + return True diff --git a/text_embedding/lvis_coco_text_embedding.npy b/text_embedding/lvis_coco_text_embedding.npy new file mode 100644 index 0000000000000000000000000000000000000000..cc87f080cad35cf8360299bcabd722e3b6fd903f --- /dev/null +++ b/text_embedding/lvis_coco_text_embedding.npy @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:934d29f264f21160ef124aa9b08be88d25845bd8a1aba14ce691512704a4c671 +size 4350080