diff --git a/assets/main_fig.png b/assets/main_fig.png
new file mode 100644
index 0000000000000000000000000000000000000000..077a94b4988bd4a86e94da94b4c31f80de312398
Binary files /dev/null and b/assets/main_fig.png differ
diff --git a/configs/ground-truth-warmup/Base-COCO-PanopticSegmentation.yaml b/configs/ground-truth-warmup/Base-COCO-PanopticSegmentation.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0cad56bb9d119f7377133a57736e2ccb1b2ff890
--- /dev/null
+++ b/configs/ground-truth-warmup/Base-COCO-PanopticSegmentation.yaml
@@ -0,0 +1,60 @@
+MODEL:
+  BACKBONE:
+    FREEZE_AT: 0
+    NAME: "build_resnet_backbone"
+  WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
+  PIXEL_MEAN: [123.675, 116.280, 103.530]
+  PIXEL_STD: [58.395, 57.120, 57.375]
+  RESNETS:
+    DEPTH: 50
+    STEM_TYPE: "basic"  # not used
+    STEM_OUT_CHANNELS: 64
+    STRIDE_IN_1X1: False
+    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
+    # NORM: "SyncBN"
+    RES5_MULTI_GRID: [1, 1, 1]  # not used
+
+SOLVER:
+  IMS_PER_BATCH: 8
+  BASE_LR: 0.0001
+  STEPS: (260231, 283888)
+  MAX_ITER: 295717
+  WARMUP_FACTOR: 1.0
+  WARMUP_ITERS: 10
+  CHECKPOINT_PERIOD: 10000
+  WEIGHT_DECAY: 0.05
+  OPTIMIZER: "ADAMW"
+  BACKBONE_MULTIPLIER: 0.1
+  CLIP_GRADIENTS:
+    ENABLED: True
+    CLIP_TYPE: "full_model"
+    CLIP_VALUE: 1.0
+    NORM_TYPE: 2.0
+  AMP:
+    ENABLED: True
+INPUT:
+  IMAGE_SIZE: 768
+  MIN_SCALE: 0.1
+  MAX_SCALE: 2.0
+  FORMAT: "RGB"
+  MIN_SIZE_TRAIN: (1024,)
+  MAX_SIZE_TRAIN: 1024
+  DATASET_MAPPER_NAME: "coco_combine_lsj"
+  MASK_FORMAT: "bitmask"
+  COLOR_AUG_SSD: True
+
+DATASETS:
+  TRAIN: ("openvocab_coco_2017_train_panoptic_with_sem_seg",)
+  TEST: ("openvocab_ade20k_panoptic_val",)  # to evaluate instance and semantic performance as well
+DATALOADER:
+  SAMPLER_TRAIN: "MultiDatasetSampler"
+  USE_DIFF_BS_SIZE: False
+  DATASET_RATIO: [1.0]
+  DATASET_BS: [2]
+  USE_RFS: [False]
+  NUM_WORKERS: 8
+  DATASET_ANN: ['mask']
+  ASPECT_RATIO_GROUPING: True
+TEST:
+  EVAL_PERIOD: 10000
+VERSION: 2
diff --git a/configs/ground-truth-warmup/mask-adapter/mask_adapter_convnext_large_cocopan_eval_ade20k.yaml b/configs/ground-truth-warmup/mask-adapter/mask_adapter_convnext_large_cocopan_eval_ade20k.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..964f88abe5542ec02c42271bf44f7a4b118bab3a
--- /dev/null
+++ b/configs/ground-truth-warmup/mask-adapter/mask_adapter_convnext_large_cocopan_eval_ade20k.yaml
@@ -0,0 +1,40 @@
+_BASE_: ../maskformer2_R50_bs16_50ep.yaml
+MODEL:
+  META_ARCHITECTURE: "MASK_Adapter"
+  MASK_ADAPTER:
+    NAME: "MASKAdapterHead"
+    MASK_IN_CHANNELS: 16
+    NUM_CHANNELS: 768
+    USE_CHECKPOINT: False
+    NUM_OUTPUT_MAPS: 16
+  # backbone part.
+  BACKBONE:
+    NAME: "CLIP"
+  WEIGHTS: ""
+  PIXEL_MEAN: [122.7709383, 116.7460125, 104.09373615]
+  PIXEL_STD: [68.5005327, 66.6321579, 70.32316305]
+  FC_CLIP:
+    CLIP_MODEL_NAME: "convnext_large_d_320"
+    CLIP_PRETRAINED_WEIGHTS: "laion2b_s29b_b131k_ft_soup"
+    EMBED_DIM: 768
+    GEOMETRIC_ENSEMBLE_ALPHA: -1.0
+    GEOMETRIC_ENSEMBLE_BETA: -1.0
+  MASK_FORMER:
+    NUM_OBJECT_QUERIES: 250
+    TEST:
+      SEMANTIC_ON: True
+      INSTANCE_ON: True
+      PANOPTIC_ON: True
+      OVERLAP_THRESHOLD: 0.8
+      OBJECT_MASK_THRESHOLD: 0.0
+
+INPUT:
+  DATASET_MAPPER_NAME: "coco_panoptic_lsj" 
+
+DATALOADER:
+  SAMPLER_TRAIN: "TrainingSampler"
+
+DATASETS:
+  TRAIN: ("openvocab_coco_2017_train_panoptic_with_sem_seg",)
+  TEST: ("openvocab_ade20k_panoptic_val",) 
+OUTPUT_DIR: ./training/first-phase/fcclip-l-adapter
diff --git a/configs/ground-truth-warmup/mask-adapter/mask_adapter_maft_convnext_base_cocostuff_eval_ade20k.yaml b/configs/ground-truth-warmup/mask-adapter/mask_adapter_maft_convnext_base_cocostuff_eval_ade20k.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7dc36d52d84eda8be17b86b30fbfcb2fe3cb5f96
--- /dev/null
+++ b/configs/ground-truth-warmup/mask-adapter/mask_adapter_maft_convnext_base_cocostuff_eval_ade20k.yaml
@@ -0,0 +1,40 @@
+_BASE_: ../maskformer2_R50_bs16_50ep.yaml
+MODEL:
+  META_ARCHITECTURE: "MASK_Adapter"
+  MASK_ADAPTER:
+    NAME: "MASKAdapterHead"
+    MASK_IN_CHANNELS: 16
+    NUM_CHANNELS: 768
+    USE_CHECKPOINT: False
+    NUM_OUTPUT_MAPS: 16
+    TRAIN_MAFT: True
+  # backbone part.
+  BACKBONE:
+    NAME: "CLIP"
+  WEIGHTS: ""
+  PIXEL_MEAN: [122.7709383, 116.7460125, 104.09373615]
+  PIXEL_STD: [68.5005327, 66.6321579, 70.32316305]
+  FC_CLIP:
+    CLIP_MODEL_NAME: "convnext_base_w_320"  
+    CLIP_PRETRAINED_WEIGHTS: "laion_aesthetic_s13b_b82k_augreg"   
+    EMBED_DIM: 640
+    GEOMETRIC_ENSEMBLE_ALPHA: -1.0
+    GEOMETRIC_ENSEMBLE_BETA: -1.0
+  MASK_FORMER:
+    NUM_OBJECT_QUERIES: 250
+    TEST:
+      SEMANTIC_ON: True
+      INSTANCE_ON: True
+      PANOPTIC_ON: True
+      OVERLAP_THRESHOLD: 0.8
+      OBJECT_MASK_THRESHOLD: 0.0
+
+INPUT:
+  DATASET_MAPPER_NAME: "mask_former_semantic" 
+
+DATASETS:
+  TRAIN: ("openvocab_coco_2017_train_stuff_sem_seg",)
+  TEST: ("openvocab_ade20k_panoptic_val",) 
+DATALOADER:
+  SAMPLER_TRAIN: "TrainingSampler"
+OUTPUT_DIR: ./training/first-phase/maft_b_adapter
diff --git a/configs/ground-truth-warmup/mask-adapter/mask_adapter_maft_convnext_large_cocostuff_eval_ade20k.yaml b/configs/ground-truth-warmup/mask-adapter/mask_adapter_maft_convnext_large_cocostuff_eval_ade20k.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3f8dd143851bb5a9d2ba9c71d0b36440380cd7cb
--- /dev/null
+++ b/configs/ground-truth-warmup/mask-adapter/mask_adapter_maft_convnext_large_cocostuff_eval_ade20k.yaml
@@ -0,0 +1,40 @@
+_BASE_: ../maskformer2_R50_bs16_50ep.yaml
+MODEL:
+  META_ARCHITECTURE: "MASK_Adapter"
+  MASK_ADAPTER:
+    NAME: "MASKAdapterHead"
+    MASK_IN_CHANNELS: 16
+    NUM_CHANNELS: 768
+    USE_CHECKPOINT: False
+    NUM_OUTPUT_MAPS: 16
+    TRAIN_MAFT: True
+  # backbone part.
+  BACKBONE:
+    NAME: "CLIP"
+  WEIGHTS: ""
+  PIXEL_MEAN: [122.7709383, 116.7460125, 104.09373615]
+  PIXEL_STD: [68.5005327, 66.6321579, 70.32316305]
+  FC_CLIP:
+    CLIP_MODEL_NAME: "convnext_large_d_320"
+    CLIP_PRETRAINED_WEIGHTS: "laion2b_s29b_b131k_ft_soup"
+    EMBED_DIM: 768
+    GEOMETRIC_ENSEMBLE_ALPHA: -1.0
+    GEOMETRIC_ENSEMBLE_BETA: -1.0
+  MASK_FORMER:
+    NUM_OBJECT_QUERIES: 250
+    TEST:
+      SEMANTIC_ON: True
+      INSTANCE_ON: True
+      PANOPTIC_ON: True
+      OVERLAP_THRESHOLD: 0.8
+      OBJECT_MASK_THRESHOLD: 0.0
+
+INPUT:
+  DATASET_MAPPER_NAME: "mask_former_semantic" 
+
+DATASETS:
+  TRAIN: ("openvocab_coco_2017_train_stuff_sem_seg",)
+  TEST: ("openvocab_ade20k_panoptic_val",) 
+DATALOADER:
+  SAMPLER_TRAIN: "TrainingSampler"
+OUTPUT_DIR: ./training/first-phase/maft_l_adapter
diff --git a/configs/ground-truth-warmup/maskformer2_R50_bs16_50ep.yaml b/configs/ground-truth-warmup/maskformer2_R50_bs16_50ep.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..ce7b5251c23441d93b709a06101b260edeac58cc
--- /dev/null
+++ b/configs/ground-truth-warmup/maskformer2_R50_bs16_50ep.yaml
@@ -0,0 +1,45 @@
+_BASE_: Base-COCO-PanopticSegmentation.yaml
+MODEL:
+  META_ARCHITECTURE: "MaskFormer"
+  SEM_SEG_HEAD:
+    NAME: "FCCLIPMASKHead"
+    IN_FEATURES: ["res2", "res3", "res4", "res5"]
+    IGNORE_VALUE: 255
+    NUM_CLASSES: 133
+    LOSS_WEIGHT: 1.0
+    CONVS_DIM: 256
+    MASK_DIM: 256
+    NORM: "GN"
+    # pixel decoder
+    PIXEL_DECODER_NAME: "MSDeformAttnPixelDecoder"
+    IN_FEATURES: ["res2", "res3", "res4", "res5"]
+    DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES: ["res3", "res4", "res5"]
+    COMMON_STRIDE: 4
+    TRANSFORMER_ENC_LAYERS: 6
+  MASK_FORMER:
+    TRANSFORMER_DECODER_NAME: "MultiScaleMaskedTransformerDecoder"
+    TRANSFORMER_IN_FEATURE: "multi_scale_pixel_decoder"
+    DEEP_SUPERVISION: True
+    NO_OBJECT_WEIGHT: 0.1
+    CLASS_WEIGHT: 2.0
+    MASK_WEIGHT: 5.0
+    DICE_WEIGHT: 5.0
+    HIDDEN_DIM: 256
+    NUM_OBJECT_QUERIES: 100
+    NHEADS: 8
+    DROPOUT: 0.0
+    DIM_FEEDFORWARD: 2048
+    ENC_LAYERS: 0
+    PRE_NORM: False
+    ENFORCE_INPUT_PROJ: False
+    SIZE_DIVISIBILITY: 32
+    DEC_LAYERS: 10  # 9 decoder layers, add one for the loss on learnable query
+    TRAIN_NUM_POINTS: 12544
+    OVERSAMPLE_RATIO: 3.0
+    IMPORTANCE_SAMPLE_RATIO: 0.75
+    TEST:
+      SEMANTIC_ON: True
+      INSTANCE_ON: True
+      PANOPTIC_ON: True
+      OVERLAP_THRESHOLD: 0.8
+      OBJECT_MASK_THRESHOLD: 0.8
diff --git a/configs/mixed-mask-training/fc-clip/Base-COCO-PanopticSegmentation.yaml b/configs/mixed-mask-training/fc-clip/Base-COCO-PanopticSegmentation.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9ce8f18dfc84ad205607ae46348175eda40996a1
--- /dev/null
+++ b/configs/mixed-mask-training/fc-clip/Base-COCO-PanopticSegmentation.yaml
@@ -0,0 +1,49 @@
+MODEL:
+  BACKBONE:
+    FREEZE_AT: 0
+    NAME: "build_resnet_backbone"
+  WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
+  PIXEL_MEAN: [123.675, 116.280, 103.530]
+  PIXEL_STD: [58.395, 57.120, 57.375]
+  RESNETS:
+    DEPTH: 50
+    STEM_TYPE: "basic"  # not used
+    STEM_OUT_CHANNELS: 64
+    STRIDE_IN_1X1: False
+    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
+    # NORM: "SyncBN"
+    RES5_MULTI_GRID: [1, 1, 1]  # not used
+DATASETS:
+  TRAIN: ("openvocab_coco_2017_train_stuff_sem_seg",)
+  TEST: ("openvocab_ade20k_panoptic_val",)  # to evaluate instance and semantic performance as well
+SOLVER:
+  IMS_PER_BATCH: 18
+  BASE_LR: 0.0001
+  STEPS: (216859, 236574)
+  MAX_ITER: 246431
+  WARMUP_FACTOR: 1.0
+  WARMUP_ITERS: 10
+  WEIGHT_DECAY: 0.05
+  OPTIMIZER: "ADAMW"
+  BACKBONE_MULTIPLIER: 0.1
+  CLIP_GRADIENTS:
+    ENABLED: True
+    CLIP_TYPE: "full_model"
+    CLIP_VALUE: 1.0
+    NORM_TYPE: 2.0
+  AMP:
+    ENABLED: True
+INPUT:
+  IMAGE_SIZE: 1024
+  MIN_SCALE: 0.1
+  MAX_SCALE: 2.0
+  MIN_SIZE_TEST: 896
+  MAX_SIZE_TEST: 896
+  FORMAT: "RGB"
+  DATASET_MAPPER_NAME: "coco_panoptic_lsj"
+TEST:
+  EVAL_PERIOD: 5000
+DATALOADER:
+  FILTER_EMPTY_ANNOTATIONS: True
+  NUM_WORKERS: 4
+VERSION: 2
diff --git a/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_a847.yaml b/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_a847.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..752df354115a333886ffd8ec4c1e6f8a0f06f411
--- /dev/null
+++ b/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_a847.yaml
@@ -0,0 +1,12 @@
+_BASE_: ./fcclip_convnext_large_eval_ade20k.yaml
+
+MODEL:
+  MASK_FORMER:
+    TEST:
+      PANOPTIC_ON: False
+      INSTANCE_ON: False
+
+DATASETS:
+  TEST: ("openvocab_ade20k_full_sem_seg_val",)
+
+OUTPUT_DIR: ./evaluation/fc-clip/a847
\ No newline at end of file
diff --git a/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_ade20k.yaml b/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_ade20k.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7daac1b8d3d2ec3f922e107a416939b585661e38
--- /dev/null
+++ b/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_ade20k.yaml
@@ -0,0 +1,55 @@
+_BASE_: ../maskformer2_R50_bs16_50ep.yaml
+MODEL:
+  META_ARCHITECTURE: "FCCLIP"
+  SEM_SEG_HEAD:
+    NAME: "FCCLIPHead"
+  # backbone part.
+  MASK_ADAPTER:
+    NAME: "MASKAdapterHead"
+    MASK_IN_CHANNELS: 16
+    NUM_CHANNELS: 768
+    USE_CHECKPOINT: False
+    NUM_OUTPUT_MAPS: 16
+    MASK_THRESHOLD: 0.5
+  BACKBONE:
+    NAME: "CLIP"
+  WEIGHTS: ""
+  PIXEL_MEAN: [122.7709383, 116.7460125, 104.09373615]
+  PIXEL_STD: [68.5005327, 66.6321579, 70.32316305]
+  FC_CLIP:
+    CLIP_MODEL_NAME: "convnext_large_d_320"
+    CLIP_PRETRAINED_WEIGHTS: "laion2b_s29b_b131k_ft_soup"
+    EMBED_DIM: 768
+    GEOMETRIC_ENSEMBLE_ALPHA: 0.7
+    GEOMETRIC_ENSEMBLE_BETA: 0.9
+  MASK_FORMER:
+    NUM_OBJECT_QUERIES: 250
+    TEST:
+      SEMANTIC_ON: True
+      INSTANCE_ON: True
+      PANOPTIC_ON: True
+      OBJECT_MASK_THRESHOLD: 0.0
+
+INPUT:
+  IMAGE_SIZE: 1024
+  MIN_SCALE: 0.1
+  MAX_SCALE: 2.0
+  COLOR_AUG_SSD: False
+SOLVER:
+  IMS_PER_BATCH: 24
+  BASE_LR: 0.0001
+  WARMUP_FACTOR: 1.0
+  WARMUP_ITERS: 0
+  WEIGHT_DECAY: 0.05
+  STEPS: (86743, 94629)
+  MAX_ITER: 98572
+  CHECKPOINT_PERIOD: 3300
+TEST:
+  EVAL_PERIOD: 3300
+
+#SEED: 9782623
+DATASETS:
+  TRAIN: ("openvocab_coco_2017_train_panoptic_with_sem_seg",)
+  TEST: ("openvocab_ade20k_panoptic_val",)
+  
+OUTPUT_DIR: ./evaluation/fc-clip/ade20k
\ No newline at end of file
diff --git a/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_coco.yaml b/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_coco.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b559cf5bd9fc92c41e8975ac029e91cfba0ef281
--- /dev/null
+++ b/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_coco.yaml
@@ -0,0 +1,4 @@
+_BASE_: ./fcclip_convnext_large_eval_ade20k.yaml
+DATASETS:
+  TEST: ("openvocab_coco_2017_val_panoptic_with_sem_seg",)
+OUTPUT_DIR: ./coco-test
\ No newline at end of file
diff --git a/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_pas20.yaml b/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_pas20.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..da3f65e00cb76c79e891b137db693ded90b70b4b
--- /dev/null
+++ b/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_pas20.yaml
@@ -0,0 +1,12 @@
+_BASE_: ./fcclip_convnext_large_eval_ade20k.yaml
+
+MODEL:
+  MASK_FORMER:
+    TEST:
+      PANOPTIC_ON: False
+      INSTANCE_ON: False
+
+DATASETS:
+  TEST: ("openvocab_pascal20_sem_seg_val",)
+
+OUTPUT_DIR: ./evaluation/fc-clip/pas20
\ No newline at end of file
diff --git a/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_pc459.yaml b/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_pc459.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..918e90084974159bf1f879291279e21168ca5abc
--- /dev/null
+++ b/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_pc459.yaml
@@ -0,0 +1,12 @@
+_BASE_: ./fcclip_convnext_large_eval_ade20k.yaml
+
+MODEL:
+  MASK_FORMER:
+    TEST:
+      PANOPTIC_ON: False
+      INSTANCE_ON: False
+
+DATASETS:
+  TEST: ("openvocab_pascal_ctx459_sem_seg_val",)
+
+OUTPUT_DIR: ./evaluation/fc-clip/pc459
\ No newline at end of file
diff --git a/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_pc59.yaml b/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_pc59.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..44920a151e40335a60ee073715f5d42767ab65a9
--- /dev/null
+++ b/configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_pc59.yaml
@@ -0,0 +1,12 @@
+_BASE_: ./fcclip_convnext_large_eval_ade20k.yaml
+
+MODEL:
+  MASK_FORMER:
+    TEST:
+      PANOPTIC_ON: False
+      INSTANCE_ON: False
+
+DATASETS:
+  TEST: ("openvocab_pascal_ctx59_sem_seg_val",)
+
+OUTPUT_DIR: ./evaluation/fc-clip/pc59
\ No newline at end of file
diff --git a/configs/mixed-mask-training/fc-clip/maskformer2_R50_bs16_50ep.yaml b/configs/mixed-mask-training/fc-clip/maskformer2_R50_bs16_50ep.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9ebf4f1114fc9ac2dd7a706acf0643559563754c
--- /dev/null
+++ b/configs/mixed-mask-training/fc-clip/maskformer2_R50_bs16_50ep.yaml
@@ -0,0 +1,45 @@
+_BASE_: Base-COCO-PanopticSegmentation.yaml
+MODEL:
+  META_ARCHITECTURE: "MaskFormer"
+  SEM_SEG_HEAD:
+    NAME: "MaskFormerHead"
+    IN_FEATURES: ["res2", "res3", "res4", "res5"]
+    IGNORE_VALUE: 255
+    NUM_CLASSES: 133
+    LOSS_WEIGHT: 1.0
+    CONVS_DIM: 256
+    MASK_DIM: 256
+    NORM: "GN"
+    # pixel decoder
+    PIXEL_DECODER_NAME: "MSDeformAttnPixelDecoder"
+    IN_FEATURES: ["res2", "res3", "res4", "res5"]
+    DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES: ["res3", "res4", "res5"]
+    COMMON_STRIDE: 4
+    TRANSFORMER_ENC_LAYERS: 6
+  MASK_FORMER:
+    TRANSFORMER_DECODER_NAME: "MultiScaleMaskedTransformerDecoder"
+    TRANSFORMER_IN_FEATURE: "multi_scale_pixel_decoder"
+    DEEP_SUPERVISION: True
+    NO_OBJECT_WEIGHT: 0.1
+    CLASS_WEIGHT: 2.0
+    MASK_WEIGHT: 5.0
+    DICE_WEIGHT: 5.0
+    HIDDEN_DIM: 256
+    NUM_OBJECT_QUERIES: 100
+    NHEADS: 8
+    DROPOUT: 0.0
+    DIM_FEEDFORWARD: 2048
+    ENC_LAYERS: 0
+    PRE_NORM: False
+    ENFORCE_INPUT_PROJ: False
+    SIZE_DIVISIBILITY: 32
+    DEC_LAYERS: 10  # 9 decoder layers, add one for the loss on learnable query
+    TRAIN_NUM_POINTS: 12544
+    OVERSAMPLE_RATIO: 3.0
+    IMPORTANCE_SAMPLE_RATIO: 0.75
+    TEST:
+      SEMANTIC_ON: True
+      INSTANCE_ON: True
+      PANOPTIC_ON: True
+      OVERLAP_THRESHOLD: 0.8
+      OBJECT_MASK_THRESHOLD: 0.8
diff --git a/configs/mixed-mask-training/maftp/Base-COCO-PanopticSegmentation.yaml b/configs/mixed-mask-training/maftp/Base-COCO-PanopticSegmentation.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..45e65acc5262605a638f1259c4f325df8a4e2bda
--- /dev/null
+++ b/configs/mixed-mask-training/maftp/Base-COCO-PanopticSegmentation.yaml
@@ -0,0 +1,62 @@
+MODEL:
+  BACKBONE:
+    FREEZE_AT: 0
+    NAME: "CLIP"
+  # WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl"
+  PIXEL_MEAN: [122.7709383, 116.7460125, 104.09373615]
+  PIXEL_STD: [68.5005327, 66.6321579, 70.32316305]
+  RESNETS:
+    DEPTH: 50
+    STEM_TYPE: "basic"  # not used
+    STEM_OUT_CHANNELS: 64
+    STRIDE_IN_1X1: False
+    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
+    # NORM: "SyncBN"
+    RES5_MULTI_GRID: [1, 1, 1]  # not used
+DATASETS:
+  TRAIN: ("coco_2017_train_panoptic",)
+  TEST: ("coco_2017_val_panoptic_with_sem_seg",)  # to evaluate instance and semantic performance as well
+SOLVER:
+  IMS_PER_BATCH: 8
+  BASE_LR: 0.0001
+  BIAS_LR_FACTOR: 1.0
+  CHECKPOINT_PERIOD: 50000000
+  MAX_ITER: 55000
+  LR_SCHEDULER_NAME: WarmupPolyLR
+  MOMENTUM: 0.9
+  NESTEROV: false
+  OPTIMIZER: ADAMW
+  POLY_LR_CONSTANT_ENDING: 0.0
+  POLY_LR_POWER: 0.9
+  REFERENCE_WORLD_SIZE: 0
+  WARMUP_FACTOR: 1.0
+  WARMUP_ITERS: 10
+  WARMUP_METHOD: linear
+  WEIGHT_DECAY: 2.0e-05
+  #WEIGHT_DECAY: 0.05
+  WEIGHT_DECAY_BIAS: null
+  WEIGHT_DECAY_EMBED: 0.0
+  WEIGHT_DECAY_NORM: 0.0
+  STEPS: (327778, 355092)
+  BACKBONE_MULTIPLIER: 0.1
+  CLIP_GRADIENTS:
+    ENABLED: True
+    CLIP_TYPE: "full_model"
+    CLIP_VALUE: 1.0
+    NORM_TYPE: 2.0
+  AMP:
+    ENABLED: True
+INPUT:
+  IMAGE_SIZE: 1024
+  MIN_SCALE: 0.1
+  MAX_SCALE: 2.0
+  MIN_SIZE_TEST: 896
+  MAX_SIZE_TEST: 896
+  FORMAT: "RGB"
+  DATASET_MAPPER_NAME: "coco_panoptic_lsj"
+TEST:
+  EVAL_PERIOD: 5000
+DATALOADER:
+  FILTER_EMPTY_ANNOTATIONS: True
+  NUM_WORKERS: 8
+VERSION: 2
diff --git a/configs/mixed-mask-training/maftp/maskformer2_R50_bs16_50ep.yaml b/configs/mixed-mask-training/maftp/maskformer2_R50_bs16_50ep.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f3d8c8c992aed047c90f31b8a8d039c338a1897e
--- /dev/null
+++ b/configs/mixed-mask-training/maftp/maskformer2_R50_bs16_50ep.yaml
@@ -0,0 +1,45 @@
+_BASE_: Base-COCO-PanopticSegmentation.yaml
+MODEL:
+  META_ARCHITECTURE: "MaskFormer"
+  SEM_SEG_HEAD:
+    NAME: "MaskFormerHead"
+    IN_FEATURES: ["res2", "res3", "res4", "res5"]
+    IGNORE_VALUE: 255
+    NUM_CLASSES: 133
+    LOSS_WEIGHT: 1.0
+    CONVS_DIM: 256
+    MASK_DIM: 256
+    NORM: "GN"
+    # pixel decoder
+    PIXEL_DECODER_NAME: "MSDeformAttnPixelDecoder"
+    IN_FEATURES: ["res2", "res3", "res4", "res5"]
+    DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES: ["res3", "res4", "res5"]
+    COMMON_STRIDE: 4
+    TRANSFORMER_ENC_LAYERS: 6
+  MASK_FORMER:
+    TRANSFORMER_DECODER_NAME: "MultiScaleMaskedTransformerDecoder"
+    TRANSFORMER_IN_FEATURE: "multi_scale_pixel_decoder"
+    DEEP_SUPERVISION: True
+    NO_OBJECT_WEIGHT: 0.1
+    CLASS_WEIGHT: 2.0
+    MASK_WEIGHT: 5.0
+    DICE_WEIGHT: 5.0
+    HIDDEN_DIM: 256
+    NUM_OBJECT_QUERIES: 100
+    NHEADS: 8
+    DROPOUT: 0.0
+    DIM_FEEDFORWARD: 2048
+    ENC_LAYERS: 0
+    PRE_NORM: False
+    ENFORCE_INPUT_PROJ: False
+    SIZE_DIVISIBILITY: 32
+    DEC_LAYERS: 10  # 9 decoder layers, add one for the loss on learnable query
+    TRAIN_NUM_POINTS: 12544
+    OVERSAMPLE_RATIO: 3.0
+    IMPORTANCE_SAMPLE_RATIO: 0.75
+    TEST:
+      SEMANTIC_ON: True
+      INSTANCE_ON: False
+      PANOPTIC_ON: False
+      OBJECT_MASK_THRESHOLD: 0.2
+      OVERLAP_THRESHOLD: 0.7
\ No newline at end of file
diff --git a/configs/mixed-mask-training/maftp/semantic/eval_a847.yaml b/configs/mixed-mask-training/maftp/semantic/eval_a847.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..cddf3b8b5c80d760df70619dd6e2a4fb716b4627
--- /dev/null
+++ b/configs/mixed-mask-training/maftp/semantic/eval_a847.yaml
@@ -0,0 +1,13 @@
+_BASE_: ./eval.yaml
+
+MODEL:
+  MASK_FORMER:
+    TEST:
+      PANOPTIC_ON: False
+      INSTANCE_ON: False
+
+DATASETS:
+  TEST: ("openvocab_ade20k_full_sem_seg_val",)
+
+
+OUTPUT_DIR: ./eval/a847
diff --git a/configs/mixed-mask-training/maftp/semantic/eval_pas20.yaml b/configs/mixed-mask-training/maftp/semantic/eval_pas20.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0593f856c3501606590e03863449ebb6da17d414
--- /dev/null
+++ b/configs/mixed-mask-training/maftp/semantic/eval_pas20.yaml
@@ -0,0 +1,12 @@
+_BASE_: ./eval.yaml
+
+MODEL:
+  MASK_FORMER:
+    TEST:
+      PANOPTIC_ON: False
+      INSTANCE_ON: False
+
+DATASETS:
+  TEST: ("openvocab_pascal20_sem_seg_val",)
+
+OUTPUT_DIR: ./eval/pas20
diff --git a/configs/mixed-mask-training/maftp/semantic/eval_pas21.yaml b/configs/mixed-mask-training/maftp/semantic/eval_pas21.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a2e2afdf8006464f1e04e7a39cae3d135fa7af92
--- /dev/null
+++ b/configs/mixed-mask-training/maftp/semantic/eval_pas21.yaml
@@ -0,0 +1,13 @@
+_BASE_: ./eval.yaml
+
+MODEL:
+  MASK_FORMER:
+    TEST:
+      PANOPTIC_ON: False
+      INSTANCE_ON: False
+
+DATASETS:
+  TEST: ("openvocab_pascal21_sem_seg_val",)
+
+
+OUTPUT_DIR: ./eval/pas21
\ No newline at end of file
diff --git a/configs/mixed-mask-training/maftp/semantic/eval_pc459.yaml b/configs/mixed-mask-training/maftp/semantic/eval_pc459.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0f304ff5529877082d4972b0fcd14ff1f837d1bd
--- /dev/null
+++ b/configs/mixed-mask-training/maftp/semantic/eval_pc459.yaml
@@ -0,0 +1,12 @@
+_BASE_: ./eval.yaml
+
+MODEL:
+  MASK_FORMER:
+    TEST:
+      PANOPTIC_ON: False
+      INSTANCE_ON: False
+
+DATASETS:
+  TEST: ("openvocab_pascal_ctx459_sem_seg_val",)
+
+OUTPUT_DIR: ./eval/pc459
\ No newline at end of file
diff --git a/configs/mixed-mask-training/maftp/semantic/eval_pc59.yaml b/configs/mixed-mask-training/maftp/semantic/eval_pc59.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..3d034def2d89c483db1680b5b91e44273c430c27
--- /dev/null
+++ b/configs/mixed-mask-training/maftp/semantic/eval_pc59.yaml
@@ -0,0 +1,12 @@
+_BASE_: ./eval.yaml
+
+MODEL:
+  MASK_FORMER:
+    TEST:
+      PANOPTIC_ON: False
+      INSTANCE_ON: False
+
+DATASETS:
+  TEST: ("openvocab_pascal_ctx59_sem_seg_val",)
+
+OUTPUT_DIR: ./eval/pc59
\ No newline at end of file
diff --git a/configs/mixed-mask-training/maftp/semantic/train_semantic_base_eval_a150.yaml b/configs/mixed-mask-training/maftp/semantic/train_semantic_base_eval_a150.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9f0c7abd335b20f7ee70b147c1f64a450d7a7b7b
--- /dev/null
+++ b/configs/mixed-mask-training/maftp/semantic/train_semantic_base_eval_a150.yaml
@@ -0,0 +1,50 @@
+# python train_net.py --config-file configs/semantic/train_semantic_base.yaml  --num-gpus 8 
+
+_BASE_: ../maskformer2_R50_bs16_50ep.yaml
+MODEL:
+  META_ARCHITECTURE: "MAFT_Plus"  # FCCLIP MAFT_Plus
+  SEM_SEG_HEAD:
+    NAME: "FCCLIPHead"
+    NUM_CLASSES: 171
+  MASK_ADAPTER:
+    NAME: "MASKAdapterHead"
+    MASK_IN_CHANNELS: 16
+    NUM_CHANNELS: 768
+    USE_CHECKPOINT: False
+    NUM_OUTPUT_MAPS: 16
+    MASK_THRESHOLD: 0.5
+  FC_CLIP:
+    CLIP_MODEL_NAME: "convnext_base_w_320"  
+    CLIP_PRETRAINED_WEIGHTS: "laion_aesthetic_s13b_b82k_augreg"   
+    EMBED_DIM: 640
+    GEOMETRIC_ENSEMBLE_ALPHA: 0.7
+    GEOMETRIC_ENSEMBLE_BETA: 1.0
+  rc_weights: 0.1
+  MASK_FORMER:
+    TEST:
+      SEMANTIC_ON: True
+      INSTANCE_ON: False
+      PANOPTIC_ON: False
+      OBJECT_MASK_THRESHOLD: 0.0
+  cdt_params:
+  - 640
+  - 8
+
+INPUT:
+  DATASET_MAPPER_NAME: "mask_former_semantic" # mask_former_semantic coco_panoptic_lsj
+DATASETS:
+  TRAIN: ("openvocab_coco_2017_train_stuff_sem_seg",)  
+  TEST: ('openvocab_ade20k_panoptic_val',) 
+
+SOLVER:
+  IMS_PER_BATCH: 24
+  BASE_LR: 0.0001
+  STEPS: (43371, 47314)
+  MAX_ITER: 49286
+  CHECKPOINT_PERIOD: 2500
+TEST:
+  EVAL_PERIOD: 2500
+INPUT:
+  DATASET_MAPPER_NAME: "mask_former_semantic"  #
+OUTPUT_DIR: ../evaluation/maftp-base/ade20k
+     
\ No newline at end of file
diff --git a/configs/mixed-mask-training/maftp/semantic/train_semantic_large_eval_a150.yaml b/configs/mixed-mask-training/maftp/semantic/train_semantic_large_eval_a150.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5f56b3d79ffcbae811da7d881d15974a311b8306
--- /dev/null
+++ b/configs/mixed-mask-training/maftp/semantic/train_semantic_large_eval_a150.yaml
@@ -0,0 +1,46 @@
+# python train_net.py --config-file configs/semantic/train_semantic_large.yaml  --num-gpus 8 
+
+_BASE_: ../maskformer2_R50_bs16_50ep.yaml
+MODEL:
+  META_ARCHITECTURE: "MAFT_Plus"  # FCCLIP MAFT_Plus
+  SEM_SEG_HEAD:
+    NAME: "FCCLIPHead"
+    NUM_CLASSES: 171
+  MASK_ADAPTER:
+    NAME: "MASKAdapterHead"
+    MASK_IN_CHANNELS: 16
+    NUM_CHANNELS: 768
+    USE_CHECKPOINT: False
+    NUM_OUTPUT_MAPS: 16
+    MASK_THRESHOLD: 0.5
+  FC_CLIP:
+    CLIP_MODEL_NAME: "convnext_large_d_320"  
+    CLIP_PRETRAINED_WEIGHTS: "laion2b_s29b_b131k_ft_soup" 
+    EMBED_DIM: 768
+    GEOMETRIC_ENSEMBLE_ALPHA: 0.8
+    GEOMETRIC_ENSEMBLE_BETA: 1.0
+  rc_weights: 0.1
+  MASK_FORMER:
+    TEST:
+      SEMANTIC_ON: True
+      INSTANCE_ON: True
+      PANOPTIC_ON: True
+      OBJECT_MASK_THRESHOLD: 0.0
+
+SOLVER:
+  IMS_PER_BATCH: 24
+  BASE_LR: 0.0001
+  STEPS: (43371, 47314)
+  MAX_ITER: 49286
+  CHECKPOINT_PERIOD: 2500
+TEST:
+  EVAL_PERIOD: 2500
+INPUT:
+  DATASET_MAPPER_NAME: "mask_former_semantic"  # mask_former_semantic coco_panoptic_lsj
+DATASETS:
+  TRAIN: ("openvocab_coco_2017_train_stuff_sem_seg",)  #  openvocab_coco_2017_train_panoptic_with_sem_seg
+  TEST: ('openvocab_ade20k_panoptic_val',) 
+
+
+
+OUTPUT_DIR: ../evaluation/maftp-large/ade20k
diff --git a/demo/demo.py b/demo/demo.py
new file mode 100644
index 0000000000000000000000000000000000000000..129d7f100c5ab22d259206e55c97b18dbe3ca49b
--- /dev/null
+++ b/demo/demo.py
@@ -0,0 +1,201 @@
+"""
+This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”).
+All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. 
+
+Reference: https://github.com/facebookresearch/Mask2Former/blob/main/demo/demo.py
+"""
+
+import argparse
+import glob
+import multiprocessing as mp
+import os
+
+# fmt: off
+import sys
+sys.path.insert(1, os.path.join(sys.path[0], '..'))
+# fmt: on
+
+import tempfile
+import time
+import warnings
+
+import cv2
+import numpy as np
+import tqdm
+
+from detectron2.config import get_cfg
+from detectron2.data.detection_utils import read_image
+from detectron2.projects.deeplab import add_deeplab_config
+from detectron2.utils.logger import setup_logger
+
+from fcclip import add_maskformer2_config, add_fcclip_config, add_mask_adapter_config
+from predictor import VisualizationDemo
+
+
+# constants
+WINDOW_NAME = "mask-adapter demo"
+
+
+def setup_cfg(args):
+    # load config from file and command-line arguments
+    cfg = get_cfg()
+    add_deeplab_config(cfg)
+    add_maskformer2_config(cfg)
+    add_fcclip_config(cfg)
+    add_mask_adapter_config(cfg)
+    cfg.merge_from_file(args.config_file)
+    cfg.merge_from_list(args.opts)
+    cfg.freeze()
+    return cfg
+
+
+def get_parser():
+    parser = argparse.ArgumentParser(description="mask-adapter demo for builtin configs")
+    parser.add_argument(
+        "--config-file",
+        default="configs/mixed-mask-training/fc-clip/fcclip/fcclip_convnext_large_eval_ade20k.yaml",
+        metavar="FILE",
+        help="path to config file",
+    )
+    parser.add_argument("--webcam", action="store_true", help="Take inputs from webcam.")
+    parser.add_argument("--video-input", help="Path to video file.")
+    parser.add_argument(
+        "--input",
+        nargs="+",
+        help="A list of space separated input images; "
+        "or a single glob pattern such as 'directory/*.jpg'",
+    )
+    parser.add_argument(
+        "--output",
+        help="A file or directory to save output visualizations. "
+        "If not given, will show output in an OpenCV window.",
+    )
+
+    parser.add_argument(
+        "--confidence-threshold",
+        type=float,
+        default=0.5,
+        help="Minimum score for instance predictions to be shown",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options using the command-line 'KEY VALUE' pairs",
+        default=[],
+        nargs=argparse.REMAINDER,
+    )
+    return parser
+
+
+def test_opencv_video_format(codec, file_ext):
+    with tempfile.TemporaryDirectory(prefix="video_format_test") as dir:
+        filename = os.path.join(dir, "test_file" + file_ext)
+        writer = cv2.VideoWriter(
+            filename=filename,
+            fourcc=cv2.VideoWriter_fourcc(*codec),
+            fps=float(30),
+            frameSize=(10, 10),
+            isColor=True,
+        )
+        [writer.write(np.zeros((10, 10, 3), np.uint8)) for _ in range(30)]
+        writer.release()
+        if os.path.isfile(filename):
+            return True
+        return False
+
+
+if __name__ == "__main__":
+    mp.set_start_method("spawn", force=True)
+    args = get_parser().parse_args()
+    setup_logger(name="fvcore")
+    logger = setup_logger()
+    logger.info("Arguments: " + str(args))
+
+    cfg = setup_cfg(args)
+
+    demo = VisualizationDemo(cfg)
+
+    if args.input:
+        if len(args.input) == 1:
+            args.input = glob.glob(os.path.expanduser(args.input[0]))
+            assert args.input, "The input path(s) was not found"
+        for path in tqdm.tqdm(args.input, disable=not args.output):
+            # use PIL, to be consistent with evaluation
+            img = read_image(path, format="BGR")
+            start_time = time.time()
+            predictions, visualized_output = demo.run_on_image(img)
+            logger.info(
+                "{}: {} in {:.2f}s".format(
+                    path,
+                    "detected {} instances".format(len(predictions["instances"]))
+                    if "instances" in predictions
+                    else "finished",
+                    time.time() - start_time,
+                )
+            )
+
+            if args.output:
+                if os.path.isdir(args.output):
+                    assert os.path.isdir(args.output), args.output
+                    out_filename = os.path.join(args.output, os.path.basename(path))
+                else:
+                    assert len(args.input) == 1, "Please specify a directory with args.output"
+                    out_filename = args.output
+                visualized_output.save(out_filename)
+            else:
+                cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
+                cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1])
+                if cv2.waitKey(0) == 27:
+                    break  # esc to quit
+    elif args.webcam:
+        assert args.input is None, "Cannot have both --input and --webcam!"
+        assert args.output is None, "output not yet supported with --webcam!"
+        cam = cv2.VideoCapture(0)
+        for vis in tqdm.tqdm(demo.run_on_video(cam)):
+            cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
+            cv2.imshow(WINDOW_NAME, vis)
+            if cv2.waitKey(1) == 27:
+                break  # esc to quit
+        cam.release()
+        cv2.destroyAllWindows()
+    elif args.video_input:
+        video = cv2.VideoCapture(args.video_input)
+        width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        frames_per_second = video.get(cv2.CAP_PROP_FPS)
+        num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+        basename = os.path.basename(args.video_input)
+        codec, file_ext = (
+            ("x264", ".mkv") if test_opencv_video_format("x264", ".mkv") else ("mp4v", ".mp4")
+        )
+        if codec == ".mp4v":
+            warnings.warn("x264 codec not available, switching to mp4v")
+        if args.output:
+            if os.path.isdir(args.output):
+                output_fname = os.path.join(args.output, basename)
+                output_fname = os.path.splitext(output_fname)[0] + file_ext
+            else:
+                output_fname = args.output
+            assert not os.path.isfile(output_fname), output_fname
+            output_file = cv2.VideoWriter(
+                filename=output_fname,
+                # some installation of opencv may not support x264 (due to its license),
+                # you can try other format (e.g. MPEG)
+                fourcc=cv2.VideoWriter_fourcc(*codec),
+                fps=float(frames_per_second),
+                frameSize=(width, height),
+                isColor=True,
+            )
+        assert os.path.isfile(args.video_input)
+        for vis_frame in tqdm.tqdm(demo.run_on_video(video), total=num_frames):
+            if args.output:
+                output_file.write(vis_frame)
+            else:
+                cv2.namedWindow(basename, cv2.WINDOW_NORMAL)
+                cv2.imshow(basename, vis_frame)
+                if cv2.waitKey(1) == 27:
+                    break  # esc to quit
+        video.release()
+        if args.output:
+            output_file.release()
+        else:
+            cv2.destroyAllWindows()
diff --git a/demo/images/000000000605.jpg b/demo/images/000000000605.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d3e4314159d7b936fcc2fd23e2ade02a70d6e97b
Binary files /dev/null and b/demo/images/000000000605.jpg differ
diff --git a/demo/images/000000001025.jpg b/demo/images/000000001025.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0604f04d6460750dc93e0ef18b09fbacc0ae5a0d
Binary files /dev/null and b/demo/images/000000001025.jpg differ
diff --git a/demo/images/000000290833.jpg b/demo/images/000000290833.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..15e77931d90aec95ac9f3559da0142818bce3d1d
Binary files /dev/null and b/demo/images/000000290833.jpg differ
diff --git a/demo/images/ADE_val_00000739.jpg b/demo/images/ADE_val_00000739.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b759cdd0134155b66dfb5b6f6efa7faee53006c8
Binary files /dev/null and b/demo/images/ADE_val_00000739.jpg differ
diff --git a/demo/images/ADE_val_00000979.jpg b/demo/images/ADE_val_00000979.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..fa9e29e5f3228f5d7a053399d50004b2a4049177
Binary files /dev/null and b/demo/images/ADE_val_00000979.jpg differ
diff --git a/demo/images/ADE_val_00001200.jpg b/demo/images/ADE_val_00001200.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..910d9277a95eec22b73cf66cebd0d7099e5f0210
Binary files /dev/null and b/demo/images/ADE_val_00001200.jpg differ
diff --git a/demo/predictor.py b/demo/predictor.py
new file mode 100644
index 0000000000000000000000000000000000000000..48016d1ae44dffc1c7ed390c20b104031e1d2c7c
--- /dev/null
+++ b/demo/predictor.py
@@ -0,0 +1,280 @@
+"""
+This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”).
+All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. 
+
+Reference: https://github.com/facebookresearch/Mask2Former/blob/main/demo/predictor.py
+"""
+
+import atexit
+import bisect
+import multiprocessing as mp
+from collections import deque
+
+import cv2
+import torch
+import itertools
+
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.engine.defaults import DefaultPredictor as d2_defaultPredictor
+from detectron2.utils.video_visualizer import VideoVisualizer
+from detectron2.utils.visualizer import ColorMode, Visualizer, random_color
+import detectron2.utils.visualizer as d2_visualizer
+
+
+class DefaultPredictor(d2_defaultPredictor):
+
+    def set_metadata(self, metadata):
+        self.model.set_metadata(metadata)
+
+
+class OpenVocabVisualizer(Visualizer):
+    def draw_panoptic_seg(self, panoptic_seg, segments_info, area_threshold=None, alpha=0.7):
+        """
+        Draw panoptic prediction annotations or results.
+
+        Args:
+            panoptic_seg (Tensor): of shape (height, width) where the values are ids for each
+                segment.
+            segments_info (list[dict] or None): Describe each segment in `panoptic_seg`.
+                If it is a ``list[dict]``, each dict contains keys "id", "category_id".
+                If None, category id of each pixel is computed by
+                ``pixel // metadata.label_divisor``.
+            area_threshold (int): stuff segments with less than `area_threshold` are not drawn.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        pred = d2_visualizer._PanopticPrediction(panoptic_seg, segments_info, self.metadata)
+
+        if self._instance_mode == ColorMode.IMAGE_BW:
+            self.output.reset_image(self._create_grayscale_image(pred.non_empty_mask()))
+        # draw mask for all semantic segments first i.e. "stuff"
+        for mask, sinfo in pred.semantic_masks():
+            category_idx = sinfo["category_id"]
+            try:
+                mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]]
+            except AttributeError:
+                mask_color = None
+
+            text = self.metadata.stuff_classes[category_idx].split(',')[0]
+            self.draw_binary_mask(
+                mask,
+                color=mask_color,
+                edge_color=d2_visualizer._OFF_WHITE,
+                text=text,
+                alpha=alpha,
+                area_threshold=area_threshold,
+            )
+        # draw mask for all instances second
+        all_instances = list(pred.instance_masks())
+        if len(all_instances) == 0:
+            return self.output
+        masks, sinfo = list(zip(*all_instances))
+        category_ids = [x["category_id"] for x in sinfo]
+
+        try:
+            scores = [x["score"] for x in sinfo]
+        except KeyError:
+            scores = None
+        stuff_classes = self.metadata.stuff_classes
+        stuff_classes = [x.split(',')[0] for x in stuff_classes]
+        labels = d2_visualizer._create_text_labels(
+            category_ids, scores, stuff_classes, [x.get("iscrowd", 0) for x in sinfo]
+        )
+
+        try:
+            colors = [
+                self._jitter([x / 255 for x in self.metadata.stuff_colors[c]]) for c in category_ids
+            ]
+        except AttributeError:
+            colors = None
+        self.overlay_instances(masks=masks, labels=labels, assigned_colors=colors, alpha=alpha)
+
+        return self.output
+
+
+class VisualizationDemo(object):
+    def __init__(self, cfg, instance_mode=ColorMode.IMAGE, parallel=False):
+        """
+        Args:
+            cfg (CfgNode):
+            instance_mode (ColorMode):
+            parallel (bool): whether to run the model in different processes from visualization.
+                Useful since the visualization logic can be slow.
+        """
+
+        coco_metadata = MetadataCatalog.get("openvocab_coco_2017_val_panoptic_with_sem_seg")
+        ade20k_metadata = MetadataCatalog.get("openvocab_ade20k_panoptic_val")
+        lvis_classes = open("./fcclip/data/datasets/lvis_1203_with_prompt_eng.txt", 'r').read().splitlines()
+        lvis_classes = [x[x.find(':')+1:] for x in lvis_classes]
+        lvis_colors = list(
+            itertools.islice(itertools.cycle(coco_metadata.stuff_colors), len(lvis_classes))
+        )
+        # rerrange to thing_classes, stuff_classes
+        coco_thing_classes = coco_metadata.thing_classes
+        coco_stuff_classes = [x for x in coco_metadata.stuff_classes if x not in coco_thing_classes]
+        coco_thing_colors = coco_metadata.thing_colors
+        coco_stuff_colors = [x for x in coco_metadata.stuff_colors if x not in coco_thing_colors]
+        ade20k_thing_classes = ade20k_metadata.thing_classes
+        ade20k_stuff_classes = [x for x in ade20k_metadata.stuff_classes if x not in ade20k_thing_classes]
+        ade20k_thing_colors = ade20k_metadata.thing_colors
+        ade20k_stuff_colors = [x for x in ade20k_metadata.stuff_colors if x not in ade20k_thing_colors]
+
+        user_classes = []
+        user_colors = [random_color(rgb=True, maximum=1) for _ in range(len(user_classes))]
+
+        stuff_classes = coco_stuff_classes + ade20k_stuff_classes
+        stuff_colors = coco_stuff_colors + ade20k_stuff_colors
+        thing_classes = user_classes + coco_thing_classes + ade20k_thing_classes + lvis_classes
+        thing_colors = user_colors + coco_thing_colors + ade20k_thing_colors + lvis_colors
+
+        thing_dataset_id_to_contiguous_id = {x: x for x in range(len(thing_classes))}
+        DatasetCatalog.register(
+            "openvocab_dataset", lambda x: []
+        )
+        self.metadata = MetadataCatalog.get("openvocab_dataset").set(
+            stuff_classes=thing_classes+stuff_classes,
+            stuff_colors=thing_colors+stuff_colors,
+            thing_dataset_id_to_contiguous_id=thing_dataset_id_to_contiguous_id,
+        )
+        #print("self.metadata:", self.metadata)
+        self.cpu_device = torch.device("cpu")
+        self.instance_mode = instance_mode
+
+        self.parallel = parallel
+        if parallel:
+            num_gpu = torch.cuda.device_count()
+            self.predictor = AsyncPredictor(cfg, num_gpus=num_gpu)
+        else:
+            self.predictor = DefaultPredictor(cfg)
+        self.predictor.set_metadata(self.metadata)
+
+    def run_on_image(self, image):
+        """
+        Args:
+            image (np.ndarray): an image of shape (H, W, C) (in BGR order).
+                This is the format used by OpenCV.
+        Returns:
+            predictions (dict): the output of the model.
+            vis_output (VisImage): the visualized image output.
+        """
+        vis_output = None
+        predictions = self.predictor(image)
+        # Convert image from OpenCV BGR format to Matplotlib RGB format.
+        image = image[:, :, ::-1]
+        visualizer = OpenVocabVisualizer(image, self.metadata, instance_mode=self.instance_mode)
+        if "panoptic_seg" in predictions:
+            panoptic_seg, segments_info = predictions["panoptic_seg"]
+            vis_output = visualizer.draw_panoptic_seg(
+                panoptic_seg.to(self.cpu_device), segments_info
+            )
+        else:
+            if "sem_seg" in predictions:
+                vis_output = visualizer.draw_sem_seg(
+                    predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
+                )
+            if "instances" in predictions:
+                instances = predictions["instances"].to(self.cpu_device)
+                vis_output = visualizer.draw_instance_predictions(predictions=instances)
+
+        return predictions, vis_output
+
+    def _frame_from_video(self, video):
+        while video.isOpened():
+            success, frame = video.read()
+            if success:
+                yield frame
+            else:
+                break
+
+
+class AsyncPredictor:
+    """
+    A predictor that runs the model asynchronously, possibly on >1 GPUs.
+    Because rendering the visualization takes considerably amount of time,
+    this helps improve throughput a little bit when rendering videos.
+    """
+
+    class _StopToken:
+        pass
+
+    class _PredictWorker(mp.Process):
+        def __init__(self, cfg, task_queue, result_queue):
+            self.cfg = cfg
+            self.task_queue = task_queue
+            self.result_queue = result_queue
+            super().__init__()
+
+        def run(self):
+            predictor = DefaultPredictor(self.cfg)
+
+            while True:
+                task = self.task_queue.get()
+                if isinstance(task, AsyncPredictor._StopToken):
+                    break
+                idx, data = task
+                result = predictor(data)
+                self.result_queue.put((idx, result))
+
+    def __init__(self, cfg, num_gpus: int = 1):
+        """
+        Args:
+            cfg (CfgNode):
+            num_gpus (int): if 0, will run on CPU
+        """
+        num_workers = max(num_gpus, 1)
+        self.task_queue = mp.Queue(maxsize=num_workers * 3)
+        self.result_queue = mp.Queue(maxsize=num_workers * 3)
+        self.procs = []
+        for gpuid in range(max(num_gpus, 1)):
+            cfg = cfg.clone()
+            cfg.defrost()
+            cfg.MODEL.DEVICE = "cuda:{}".format(gpuid) if num_gpus > 0 else "cpu"
+            self.procs.append(
+                AsyncPredictor._PredictWorker(cfg, self.task_queue, self.result_queue)
+            )
+
+        self.put_idx = 0
+        self.get_idx = 0
+        self.result_rank = []
+        self.result_data = []
+
+        for p in self.procs:
+            p.start()
+        atexit.register(self.shutdown)
+
+    def put(self, image):
+        self.put_idx += 1
+        self.task_queue.put((self.put_idx, image))
+
+    def get(self):
+        self.get_idx += 1  # the index needed for this request
+        if len(self.result_rank) and self.result_rank[0] == self.get_idx:
+            res = self.result_data[0]
+            del self.result_data[0], self.result_rank[0]
+            return res
+
+        while True:
+            # make sure the results are returned in the correct order
+            idx, res = self.result_queue.get()
+            if idx == self.get_idx:
+                return res
+            insert = bisect.bisect(self.result_rank, idx)
+            self.result_rank.insert(insert, idx)
+            self.result_data.insert(insert, res)
+
+    def __len__(self):
+        return self.put_idx - self.get_idx
+
+    def __call__(self, image):
+        self.put(image)
+        return self.get()
+
+    def shutdown(self):
+        for _ in self.procs:
+            self.task_queue.put(AsyncPredictor._StopToken())
+
+    @property
+    def default_buffer_size(self):
+        return len(self.procs) * 5
\ No newline at end of file
diff --git a/mask_adapter/.DS_Store b/mask_adapter/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..23d90bd83fecfc358450f7d9b9457ff2ebcc3d3b
Binary files /dev/null and b/mask_adapter/.DS_Store differ
diff --git a/mask_adapter/__init__.py b/mask_adapter/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..27ba9568cd683c7317227aadb9067f431fcb32ed
--- /dev/null
+++ b/mask_adapter/__init__.py
@@ -0,0 +1,44 @@
+"""
+Copyright (2023) Bytedance Ltd. and/or its affiliates
+
+Licensed under the Apache License, Version 2.0 (the "License"); 
+you may not use this file except in compliance with the License. 
+You may obtain a copy of the License at 
+
+    http://www.apache.org/licenses/LICENSE-2.0 
+
+Unless required by applicable law or agreed to in writing, software 
+distributed under the License is distributed on an "AS IS" BASIS, 
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+See the License for the specific language governing permissions and 
+limitations under the License. 
+"""
+from . import data  # register all new datasets
+from . import modeling
+
+# config
+from .config import add_maskformer2_config, add_fcclip_config, add_mask_adapter_config
+
+# dataset loading
+from .data.dataset_mappers.coco_instance_new_baseline_dataset_mapper import COCOInstanceNewBaselineDatasetMapper
+from .data.dataset_mappers.coco_panoptic_new_baseline_dataset_mapper import COCOPanopticNewBaselineDatasetMapper
+#from .data.dataset_mappers.grand_new_baseline_dataset_mapper import GrandNewBaselineDatasetMapper   
+from .data.dataset_mappers.mask_former_instance_dataset_mapper import (
+    MaskFormerInstanceDatasetMapper,
+)
+from .data.dataset_mappers.mask_former_panoptic_dataset_mapper import (
+    MaskFormerPanopticDatasetMapper,
+)
+from .data.dataset_mappers.mask_former_semantic_dataset_mapper import (
+    MaskFormerSemanticDatasetMapper,
+)
+from .data.dataset_mappers.coco_combine_new_baseline_dataset_mapper import (
+    COCOCombineNewBaselineDatasetMapper,
+)
+from .data.custom_dataset_dataloader import *
+# models
+from .mask_adapter import MASK_Adapter
+from .test_time_augmentation import SemanticSegmentorWithTTA
+
+# evaluation
+from .evaluation.instance_evaluation import InstanceSegEvaluator
diff --git a/mask_adapter/__pycache__/__init__.cpython-310.pyc b/mask_adapter/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a68383bfbcd6cd6ef148a72c4ffaadbb0e7095ba
Binary files /dev/null and b/mask_adapter/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mask_adapter/__pycache__/__init__.cpython-38.pyc b/mask_adapter/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6467faa4903a6be0f0b0dd97998faa2d129c8afa
Binary files /dev/null and b/mask_adapter/__pycache__/__init__.cpython-38.pyc differ
diff --git a/mask_adapter/__pycache__/config.cpython-310.pyc b/mask_adapter/__pycache__/config.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..54510402cf668701ba6d50fe83c154e69c884ee7
Binary files /dev/null and b/mask_adapter/__pycache__/config.cpython-310.pyc differ
diff --git a/mask_adapter/__pycache__/config.cpython-38.pyc b/mask_adapter/__pycache__/config.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dfa297e4d91a477058abdd30fe0b1dc98ef37c98
Binary files /dev/null and b/mask_adapter/__pycache__/config.cpython-38.pyc differ
diff --git a/mask_adapter/__pycache__/fcclip.cpython-310.pyc b/mask_adapter/__pycache__/fcclip.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..def71d6f94aead039499a80cf72d9f7cb89b0f2d
Binary files /dev/null and b/mask_adapter/__pycache__/fcclip.cpython-310.pyc differ
diff --git a/mask_adapter/__pycache__/fcclip.cpython-38.pyc b/mask_adapter/__pycache__/fcclip.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4051d08d54cbec8ec3c127ab22550498940548df
Binary files /dev/null and b/mask_adapter/__pycache__/fcclip.cpython-38.pyc differ
diff --git a/mask_adapter/__pycache__/mask_adapter.cpython-310.pyc b/mask_adapter/__pycache__/mask_adapter.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..258861281f394c58c5c083e84e07ca54376e71d4
Binary files /dev/null and b/mask_adapter/__pycache__/mask_adapter.cpython-310.pyc differ
diff --git a/mask_adapter/__pycache__/mask_adapter.cpython-38.pyc b/mask_adapter/__pycache__/mask_adapter.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..86def20808948a92baea923edd2e6d7a48a704e4
Binary files /dev/null and b/mask_adapter/__pycache__/mask_adapter.cpython-38.pyc differ
diff --git a/mask_adapter/__pycache__/sam_maskadapter.cpython-310.pyc b/mask_adapter/__pycache__/sam_maskadapter.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9b0f8973415b4a1b638a55df3ca52ff628f5878f
Binary files /dev/null and b/mask_adapter/__pycache__/sam_maskadapter.cpython-310.pyc differ
diff --git a/mask_adapter/__pycache__/test_time_augmentation.cpython-310.pyc b/mask_adapter/__pycache__/test_time_augmentation.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e6628d22176ad796cf65018d0c93982ff08f80fb
Binary files /dev/null and b/mask_adapter/__pycache__/test_time_augmentation.cpython-310.pyc differ
diff --git a/mask_adapter/__pycache__/test_time_augmentation.cpython-38.pyc b/mask_adapter/__pycache__/test_time_augmentation.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..de4d1c21df11d71bc2615c6e920ad8034f32c2c0
Binary files /dev/null and b/mask_adapter/__pycache__/test_time_augmentation.cpython-38.pyc differ
diff --git a/mask_adapter/config.py b/mask_adapter/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..61e2a19116efa589bff31f5708618a7c3b4aaa49
--- /dev/null
+++ b/mask_adapter/config.py
@@ -0,0 +1,150 @@
+# -*- coding: utf-8 -*-
+"""
+This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”).
+All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. 
+
+Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/config.py
+"""
+from detectron2.config import CfgNode as CN
+
+
+def add_maskformer2_config(cfg):
+    """
+    Add config for MASK_FORMER.
+    """
+    # NOTE: configs from original maskformer
+    # data config
+    # select the dataset mapper
+    cfg.INPUT.DATASET_MAPPER_NAME = "mask_former_semantic"
+    # Color augmentation
+    cfg.INPUT.COLOR_AUG_SSD = False
+    # We retry random cropping until no single category in semantic segmentation GT occupies more
+    # than `SINGLE_CATEGORY_MAX_AREA` part of the crop.
+    cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA = 1.0
+    # Pad image and segmentation GT in dataset mapper.
+    cfg.INPUT.SIZE_DIVISIBILITY = -1
+
+    # solver config
+    # weight decay on embedding
+    cfg.SOLVER.WEIGHT_DECAY_EMBED = 0.0
+    # optimizer
+    cfg.SOLVER.OPTIMIZER = "ADAMW"
+    cfg.SOLVER.BACKBONE_MULTIPLIER = 0.1
+
+    # mask_former model config
+    cfg.MODEL.MASK_FORMER = CN()
+
+    # loss
+    cfg.MODEL.MASK_FORMER.DEEP_SUPERVISION = True
+    cfg.MODEL.MASK_FORMER.NO_OBJECT_WEIGHT = 0.1
+    cfg.MODEL.MASK_FORMER.CLASS_WEIGHT = 1.0
+    cfg.MODEL.MASK_FORMER.DICE_WEIGHT = 1.0
+    cfg.MODEL.MASK_FORMER.MASK_WEIGHT = 20.0
+
+    # transformer config
+    cfg.MODEL.MASK_FORMER.NHEADS = 8
+    cfg.MODEL.MASK_FORMER.DROPOUT = 0.1
+    cfg.MODEL.MASK_FORMER.DIM_FEEDFORWARD = 2048
+    cfg.MODEL.MASK_FORMER.ENC_LAYERS = 0
+    cfg.MODEL.MASK_FORMER.DEC_LAYERS = 6
+    cfg.MODEL.MASK_FORMER.PRE_NORM = False
+
+    cfg.MODEL.MASK_FORMER.HIDDEN_DIM = 256
+    cfg.MODEL.MASK_FORMER.NUM_OBJECT_QUERIES = 100
+
+    cfg.MODEL.MASK_FORMER.TRANSFORMER_IN_FEATURE = "res5"
+    cfg.MODEL.MASK_FORMER.ENFORCE_INPUT_PROJ = False
+
+    # mask_former inference config
+    cfg.MODEL.MASK_FORMER.TEST = CN()
+    cfg.MODEL.MASK_FORMER.TEST.SEMANTIC_ON = True
+    cfg.MODEL.MASK_FORMER.TEST.INSTANCE_ON = False
+    cfg.MODEL.MASK_FORMER.TEST.PANOPTIC_ON = False
+    cfg.MODEL.MASK_FORMER.TEST.OBJECT_MASK_THRESHOLD = 0.0
+    cfg.MODEL.MASK_FORMER.TEST.OVERLAP_THRESHOLD = 0.0
+    cfg.MODEL.MASK_FORMER.TEST.SEM_SEG_POSTPROCESSING_BEFORE_INFERENCE = False
+
+    # Sometimes `backbone.size_divisibility` is set to 0 for some backbone (e.g. ResNet)
+    # you can use this config to override
+    cfg.MODEL.MASK_FORMER.SIZE_DIVISIBILITY = 32
+
+    # pixel decoder config
+    cfg.MODEL.SEM_SEG_HEAD.MASK_DIM = 256
+    # adding transformer in pixel decoder
+    cfg.MODEL.SEM_SEG_HEAD.TRANSFORMER_ENC_LAYERS = 0
+    # pixel decoder
+    cfg.MODEL.SEM_SEG_HEAD.PIXEL_DECODER_NAME = "BasePixelDecoder"
+
+    # swin transformer backbone
+    cfg.MODEL.SWIN = CN()
+    cfg.MODEL.SWIN.PRETRAIN_IMG_SIZE = 224
+    cfg.MODEL.SWIN.PATCH_SIZE = 4
+    cfg.MODEL.SWIN.EMBED_DIM = 96
+    cfg.MODEL.SWIN.DEPTHS = [2, 2, 6, 2]
+    cfg.MODEL.SWIN.NUM_HEADS = [3, 6, 12, 24]
+    cfg.MODEL.SWIN.WINDOW_SIZE = 7
+    cfg.MODEL.SWIN.MLP_RATIO = 4.0
+    cfg.MODEL.SWIN.QKV_BIAS = True
+    cfg.MODEL.SWIN.QK_SCALE = None
+    cfg.MODEL.SWIN.DROP_RATE = 0.0
+    cfg.MODEL.SWIN.ATTN_DROP_RATE = 0.0
+    cfg.MODEL.SWIN.DROP_PATH_RATE = 0.3
+    cfg.MODEL.SWIN.APE = False
+    cfg.MODEL.SWIN.PATCH_NORM = True
+    cfg.MODEL.SWIN.OUT_FEATURES = ["res2", "res3", "res4", "res5"]
+    cfg.MODEL.SWIN.USE_CHECKPOINT = False
+
+    # NOTE: maskformer2 extra configs
+    # transformer module
+    cfg.MODEL.MASK_FORMER.TRANSFORMER_DECODER_NAME = "MultiScaleMaskedTransformerDecoder"
+
+    # LSJ aug
+    cfg.INPUT.IMAGE_SIZE = 1024
+    cfg.INPUT.MIN_SCALE = 0.1
+    cfg.INPUT.MAX_SCALE = 2.0
+
+    # MSDeformAttn encoder configs
+    cfg.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES = ["res3", "res4", "res5"]
+    cfg.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_N_POINTS = 4
+    cfg.MODEL.SEM_SEG_HEAD.DEFORMABLE_TRANSFORMER_ENCODER_N_HEADS = 8
+
+    # point loss configs
+    # Number of points sampled during training for a mask point head.
+    cfg.MODEL.MASK_FORMER.TRAIN_NUM_POINTS = 112 * 112
+    # Oversampling parameter for PointRend point sampling during training. Parameter `k` in the
+    # original paper.
+    cfg.MODEL.MASK_FORMER.OVERSAMPLE_RATIO = 3.0
+    # Importance sampling parameter for PointRend point sampling during training. Parametr `beta` in
+    # the original paper.
+    cfg.MODEL.MASK_FORMER.IMPORTANCE_SAMPLE_RATIO = 0.75
+
+
+def add_fcclip_config(cfg):
+    # FC-CLIP model config
+    cfg.MODEL.FC_CLIP = CN()
+    cfg.MODEL.FC_CLIP.CLIP_MODEL_NAME = "convnext_large_d_320"
+    cfg.MODEL.FC_CLIP.CLIP_PRETRAINED_WEIGHTS = "laion2b_s29b_b131k_ft_soup"
+    cfg.MODEL.FC_CLIP.EMBED_DIM = 768
+    cfg.MODEL.FC_CLIP.GEOMETRIC_ENSEMBLE_ALPHA = 0.4
+    cfg.MODEL.FC_CLIP.GEOMETRIC_ENSEMBLE_BETA = 0.8
+    cfg.MODEL.FC_CLIP.ENSEMBLE_ON_VALID_MASK = False
+
+def add_mask_adapter_config(cfg):
+    # Mask-Adapter model config
+    cfg.MODEL.MASK_ADAPTER = CN()
+    cfg.MODEL.MASK_ADAPTER.MASK_IN_CHANNELS = 16
+    cfg.MODEL.MASK_ADAPTER.NUM_CHANNELS = 768
+    cfg.MODEL.MASK_ADAPTER.USE_CHECKPOINT = False
+    cfg.MODEL.MASK_ADAPTER.NUM_OUTPUT_MAPS = 16
+    
+    cfg.MODEL.MASK_ADAPTER.MASK_THRESHOLD  = 0.45
+    cfg.MODEL.MASK_ADAPTER.TRAIN_MAFT = False
+    
+    cfg.MODEL.MASK_ADAPTER.NAME = "MASKAdapterHead"
+    
+    cfg.DATALOADER.DATASET_RATIO = [1, 1]
+    cfg.DATALOADER.USE_DIFF_BS_SIZE = True
+    cfg.DATALOADER.DATASET_BS = [2, 2]
+    cfg.DATALOADER.USE_RFS = [False, False]
+    cfg.DATALOADER.MULTI_DATASET_GROUPING = True
+    cfg.DATALOADER.DATASET_ANN = ['box', 'box']
\ No newline at end of file
diff --git a/mask_adapter/data/.DS_Store b/mask_adapter/data/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..e829afd79ec3201bfb6e7e9a3053eb81f794d5de
Binary files /dev/null and b/mask_adapter/data/.DS_Store differ
diff --git a/mask_adapter/data/__init__.py b/mask_adapter/data/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..54e2ec93d0494174518bc86d5769cea98fab0ea7
--- /dev/null
+++ b/mask_adapter/data/__init__.py
@@ -0,0 +1,16 @@
+"""
+Copyright (2023) Bytedance Ltd. and/or its affiliates
+
+Licensed under the Apache License, Version 2.0 (the "License"); 
+you may not use this file except in compliance with the License. 
+You may obtain a copy of the License at 
+
+    http://www.apache.org/licenses/LICENSE-2.0 
+
+Unless required by applicable law or agreed to in writing, software 
+distributed under the License is distributed on an "AS IS" BASIS, 
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+See the License for the specific language governing permissions and 
+limitations under the License. 
+"""
+from . import datasets
diff --git a/mask_adapter/data/__pycache__/__init__.cpython-310.pyc b/mask_adapter/data/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5815d02b3f2443bbda74be2f0f61055f35e1e397
Binary files /dev/null and b/mask_adapter/data/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mask_adapter/data/__pycache__/__init__.cpython-38.pyc b/mask_adapter/data/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7668cd5f7aa13751de97807d432fa5b034dcb60b
Binary files /dev/null and b/mask_adapter/data/__pycache__/__init__.cpython-38.pyc differ
diff --git a/mask_adapter/data/__pycache__/custom_dataset_dataloader.cpython-310.pyc b/mask_adapter/data/__pycache__/custom_dataset_dataloader.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..164b526b9d98149f384ceb44a24826042ebf9626
Binary files /dev/null and b/mask_adapter/data/__pycache__/custom_dataset_dataloader.cpython-310.pyc differ
diff --git a/mask_adapter/data/__pycache__/custom_dataset_dataloader.cpython-38.pyc b/mask_adapter/data/__pycache__/custom_dataset_dataloader.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8fd0068d877e3af2db6fb677a32d80b8024a1d2a
Binary files /dev/null and b/mask_adapter/data/__pycache__/custom_dataset_dataloader.cpython-38.pyc differ
diff --git a/mask_adapter/data/custom_dataset_dataloader.py b/mask_adapter/data/custom_dataset_dataloader.py
new file mode 100644
index 0000000000000000000000000000000000000000..38b6e7b060f4dff4c801d6b27845c0ae1529cc26
--- /dev/null
+++ b/mask_adapter/data/custom_dataset_dataloader.py
@@ -0,0 +1,331 @@
+# Copyright (c) 2024 ByteDance. All Rights Reserved.
+# Part of the code is from https://github.com/xingyizhou/UniDet/blob/master/projects/UniDet/unidet/data/multi_dataset_dataloader.py (Apache-2.0 License)
+import copy
+import logging
+import numpy as np
+import operator
+import torch
+import torch.utils.data
+import json
+from detectron2.utils.comm import get_world_size
+from detectron2.utils.logger import _log_api_usage, log_first_n
+
+from detectron2.config import configurable
+from detectron2.data import samplers
+from torch.utils.data.sampler import BatchSampler, Sampler
+from detectron2.data.common import DatasetFromList, MapDataset
+from detectron2.data.dataset_mapper import DatasetMapper
+from detectron2.data.build import get_detection_dataset_dicts, build_batch_data_loader
+from detectron2.data.samplers import TrainingSampler, RepeatFactorTrainingSampler
+from detectron2.data.build import worker_init_reset_seed, print_instances_class_histogram
+from detectron2.data.build import filter_images_with_only_crowd_annotations
+from detectron2.data.build import filter_images_with_few_keypoints
+from detectron2.data.build import check_metadata_consistency
+from detectron2.data.catalog import MetadataCatalog, DatasetCatalog
+from detectron2.utils import comm
+import itertools
+import math
+from collections import defaultdict
+from typing import Optional
+
+
+def _custom_train_loader_from_config(cfg, mapper=None, *, dataset=None, sampler=None):
+    sampler_name = cfg.DATALOADER.SAMPLER_TRAIN # "MultiDatasetSampler"
+    if 'MultiDataset' in sampler_name: # True
+        dataset_dicts = get_detection_dataset_dicts_with_source(
+            cfg.DATASETS.TRAIN,
+            filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
+            min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
+            if cfg.MODEL.KEYPOINT_ON else 0,
+            proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
+        )
+    else: # False
+        dataset_dicts = get_detection_dataset_dicts(
+            cfg.DATASETS.TRAIN,
+            filter_empty=cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS,
+            min_keypoints=cfg.MODEL.ROI_KEYPOINT_HEAD.MIN_KEYPOINTS_PER_IMAGE
+            if cfg.MODEL.KEYPOINT_ON else 0,
+            proposal_files=cfg.DATASETS.PROPOSAL_FILES_TRAIN if cfg.MODEL.LOAD_PROPOSALS else None,
+        )
+
+    if mapper is None: # False
+        mapper = DatasetMapper(cfg, True)
+
+    if sampler is not None:
+        pass
+    elif sampler_name == "TrainingSampler": # False
+        sampler = TrainingSampler(len(dataset))
+    elif sampler_name == "MultiDatasetSampler": # True
+        sampler = MultiDatasetSampler(
+            dataset_dicts,
+            dataset_ratio = cfg.DATALOADER.DATASET_RATIO,
+            use_rfs = cfg.DATALOADER.USE_RFS,
+            dataset_ann = cfg.DATALOADER.DATASET_ANN,
+            repeat_threshold = cfg.DATALOADER.REPEAT_THRESHOLD,
+        )
+    elif sampler_name == "RepeatFactorTrainingSampler": # False
+        repeat_factors = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency(
+            dataset_dicts, cfg.DATALOADER.REPEAT_THRESHOLD
+        )
+        sampler = RepeatFactorTrainingSampler(repeat_factors)
+    else:
+        raise ValueError("Unknown training sampler: {}".format(sampler_name))
+
+    return {
+        "dataset": dataset_dicts,
+        "sampler": sampler,
+        "mapper": mapper,
+        "total_batch_size": cfg.SOLVER.IMS_PER_BATCH, # 64
+        "aspect_ratio_grouping": cfg.DATALOADER.ASPECT_RATIO_GROUPING,
+        "num_workers": cfg.DATALOADER.NUM_WORKERS, # 8
+        'multi_dataset_grouping': cfg.DATALOADER.MULTI_DATASET_GROUPING, # True
+        'use_diff_bs_size': cfg.DATALOADER.USE_DIFF_BS_SIZE, # True
+        'dataset_bs': cfg.DATALOADER.DATASET_BS, # [8, 32]
+        'num_datasets': len(cfg.DATASETS.TRAIN) # 2
+    }
+
+
+@configurable(from_config=_custom_train_loader_from_config)
+def build_custom_train_loader(
+        dataset, *, mapper, sampler, 
+        total_batch_size=16, # 64
+        aspect_ratio_grouping=True, 
+        num_workers=0, # 8
+        num_datasets=1, # 2
+        multi_dataset_grouping=False, # True
+        use_diff_bs_size=False, # True
+        dataset_bs=[] # [8, 32]
+    ):
+    """
+    Modified from detectron2.data.build.build_custom_train_loader, but supports
+    different samplers
+    """
+    if isinstance(dataset, list):
+        dataset = DatasetFromList(dataset, copy=False)
+    if mapper is not None: # True
+        dataset = MapDataset(dataset, mapper)
+    if sampler is None: # False
+        sampler = TrainingSampler(len(dataset))
+    assert isinstance(sampler, torch.utils.data.sampler.Sampler)
+    if multi_dataset_grouping: # True
+        return build_multi_dataset_batch_data_loader(
+            use_diff_bs_size,
+            dataset_bs,
+            dataset,
+            sampler,
+            total_batch_size,
+            num_datasets=num_datasets,
+            num_workers=num_workers,
+        )
+    else: # False
+        return build_batch_data_loader(
+            dataset,
+            sampler,
+            total_batch_size,
+            aspect_ratio_grouping=aspect_ratio_grouping,
+            num_workers=num_workers,
+        )
+
+
+def build_multi_dataset_batch_data_loader(
+    use_diff_bs_size, dataset_bs,
+    dataset, sampler, total_batch_size, num_datasets, num_workers=0
+):
+    """
+    """
+    world_size = get_world_size()
+    assert (
+        total_batch_size > 0 and total_batch_size % world_size == 0
+    ), "Total batch size ({}) must be divisible by the number of gpus ({}).".format(
+        total_batch_size, world_size
+    )
+
+    batch_size = total_batch_size // world_size
+    data_loader = torch.utils.data.DataLoader(
+        dataset,
+        sampler=sampler,
+        num_workers=num_workers,
+        batch_sampler=None,
+        collate_fn=operator.itemgetter(0),  # don't batch, but yield individual elements
+        worker_init_fn=worker_init_reset_seed,
+    )  # yield individual mapped dict
+    if use_diff_bs_size:
+        return DIFFMDAspectRatioGroupedDataset(
+            data_loader, dataset_bs, num_datasets)
+    else:
+        return MDAspectRatioGroupedDataset(
+            data_loader, batch_size, num_datasets)
+
+
+def get_detection_dataset_dicts_with_source(
+    dataset_names, filter_empty=True, min_keypoints=0, proposal_files=None
+):
+    assert len(dataset_names)
+    dataset_dicts = [DatasetCatalog.get(dataset_name) for dataset_name in dataset_names]
+    for dataset_name, dicts in zip(dataset_names, dataset_dicts):
+        assert len(dicts), "Dataset '{}' is empty!".format(dataset_name)
+    
+    for source_id, (dataset_name, dicts) in \
+        enumerate(zip(dataset_names, dataset_dicts)):
+        assert len(dicts), "Dataset '{}' is empty!".format(dataset_name)
+        for d in dicts:
+            d['dataset_source'] = source_id # add "dataset_source" to original dict
+
+        if "annotations" in dicts[0]:
+            try:
+                class_names = MetadataCatalog.get(dataset_name).thing_classes
+                check_metadata_consistency("thing_classes", dataset_name)
+                print_instances_class_histogram(dicts, class_names)
+            except AttributeError:  # class names are not available for this dataset
+                pass
+
+    assert proposal_files is None
+
+    dataset_dicts = list(itertools.chain.from_iterable(dataset_dicts)) # connect multiple iterable objects to one
+
+    has_instances = "annotations" in dataset_dicts[0]
+    if filter_empty and has_instances:
+        dataset_dicts = filter_images_with_only_crowd_annotations(dataset_dicts)
+    if min_keypoints > 0 and has_instances:
+        dataset_dicts = filter_images_with_few_keypoints(dataset_dicts, min_keypoints)
+
+    return dataset_dicts
+
+
+class MultiDatasetSampler(Sampler):
+    def __init__(
+        self, 
+        dataset_dicts, 
+        dataset_ratio,
+        use_rfs, # [True, False]
+        dataset_ann,
+        repeat_threshold=0.001,
+        seed: Optional[int] = None,
+        ):
+        """
+        """
+        sizes = [0 for _ in range(len(dataset_ratio))]
+        for d in dataset_dicts:
+            sizes[d['dataset_source']] += 1 # size of each dataset
+        print('dataset sizes', sizes)
+        self.sizes = sizes
+        assert len(dataset_ratio) == len(sizes), \
+            'length of dataset ratio {} should be equal to number if dataset {}'.format(
+                len(dataset_ratio), len(sizes)
+            )
+        if seed is None:
+            seed = comm.shared_random_seed() # seed shared across all GPUs
+        self._seed = int(seed)
+        self._rank = comm.get_rank()
+        self._world_size = comm.get_world_size()
+        
+        self.dataset_ids =  torch.tensor(
+            [d['dataset_source'] for d in dataset_dicts], dtype=torch.long)
+
+        dataset_weight = [torch.ones(s) * max(sizes) / s * r / sum(dataset_ratio) \
+            for i, (r, s) in enumerate(zip(dataset_ratio, sizes))]
+        dataset_weight = torch.cat(dataset_weight)
+        
+        rfs_factors = []
+        st = 0
+        for i, s in enumerate(sizes):
+            if use_rfs[i]:
+                if dataset_ann[i] == 'box':
+                    rfs_func = RepeatFactorTrainingSampler.repeat_factors_from_category_frequency
+                else:
+                    rfs_func = repeat_factors_from_tag_frequency
+                rfs_factor = rfs_func(
+                    dataset_dicts[st: st + s],
+                    repeat_thresh=repeat_threshold)
+                rfs_factor = rfs_factor * (s / rfs_factor.sum())
+            else:
+                rfs_factor = torch.ones(s)
+            rfs_factors.append(rfs_factor)
+            st = st + s
+        rfs_factors = torch.cat(rfs_factors)
+
+        self.weights = dataset_weight * rfs_factors # weights for each element in the dataset_dict
+        self.sample_epoch_size = len(self.weights)
+
+    def __iter__(self):
+        start = self._rank
+        yield from itertools.islice(
+            self._infinite_indices(), start, None, self._world_size) # itertools.islice(iterable, start, stop[, step])
+
+
+    def _infinite_indices(self):
+        g = torch.Generator()
+        g.manual_seed(self._seed)
+        while True:
+            ids = torch.multinomial(
+                self.weights, self.sample_epoch_size, generator=g, 
+                replacement=True) # randomly sample according to the given weights
+            nums = [(self.dataset_ids[ids] == i).sum().int().item() \
+                for i in range(len(self.sizes))]
+            yield from ids
+
+
+class MDAspectRatioGroupedDataset(torch.utils.data.IterableDataset):
+    def __init__(self, dataset, batch_size, num_datasets):
+        """
+        """
+        self.dataset = dataset
+        self.batch_size = batch_size
+        self._buckets = [[] for _ in range(2 * num_datasets)] # there are (2 x num_datasets) types of data. For each dataset, there are two types: w>h or w<=h
+
+    def __iter__(self):
+        for d in self.dataset:
+            w, h = d["width"], d["height"]
+            aspect_ratio_bucket_id = 0 if w > h else 1
+            bucket_id = d['dataset_source'] * 2 + aspect_ratio_bucket_id
+            bucket = self._buckets[bucket_id]
+            bucket.append(d)
+            if len(bucket) == self.batch_size:
+                yield bucket[:]
+                del bucket[:]
+
+
+class DIFFMDAspectRatioGroupedDataset(torch.utils.data.IterableDataset):
+    def __init__(self, dataset, batch_sizes, num_datasets):
+        """
+        """
+        self.dataset = dataset
+        self.batch_sizes = batch_sizes
+        self._buckets = [[] for _ in range(2 * num_datasets)]
+
+    def __iter__(self):
+        for d in self.dataset:
+            w, h = d["width"], d["height"]
+            aspect_ratio_bucket_id = 0 if w > h else 1
+            bucket_id = d['dataset_source'] * 2 + aspect_ratio_bucket_id
+            bucket = self._buckets[bucket_id]
+            bucket.append(d)
+            if len(bucket) == self.batch_sizes[d['dataset_source']]: # allow different batchsizes
+                yield bucket[:]
+                del bucket[:]
+
+
+def repeat_factors_from_tag_frequency(dataset_dicts, repeat_thresh):
+    """
+    """
+    category_freq = defaultdict(int)
+    for dataset_dict in dataset_dicts:
+        cat_ids = dataset_dict['pos_category_ids']
+        for cat_id in cat_ids:
+            category_freq[cat_id] += 1
+    num_images = len(dataset_dicts)
+    for k, v in category_freq.items():
+        category_freq[k] = v / num_images
+
+    category_rep = {
+        cat_id: max(1.0, math.sqrt(repeat_thresh / cat_freq))
+        for cat_id, cat_freq in category_freq.items()
+    }
+
+    rep_factors = []
+    for dataset_dict in dataset_dicts:
+        cat_ids = dataset_dict['pos_category_ids']
+        rep_factor = max({category_rep[cat_id] for cat_id in cat_ids}, default=1.0)
+        rep_factors.append(rep_factor)
+
+    return torch.tensor(rep_factors, dtype=torch.float32)
\ No newline at end of file
diff --git a/mask_adapter/data/dataset_mappers/__init__.py b/mask_adapter/data/dataset_mappers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..be75f0cd9568f901b3174ecfb43c0b9f4fa1f77d
--- /dev/null
+++ b/mask_adapter/data/dataset_mappers/__init__.py
@@ -0,0 +1,15 @@
+"""
+Copyright (2023) Bytedance Ltd. and/or its affiliates
+
+Licensed under the Apache License, Version 2.0 (the "License"); 
+you may not use this file except in compliance with the License. 
+You may obtain a copy of the License at 
+
+    http://www.apache.org/licenses/LICENSE-2.0 
+
+Unless required by applicable law or agreed to in writing, software 
+distributed under the License is distributed on an "AS IS" BASIS, 
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+See the License for the specific language governing permissions and 
+limitations under the License. 
+"""
\ No newline at end of file
diff --git a/mask_adapter/data/dataset_mappers/__pycache__/__init__.cpython-310.pyc b/mask_adapter/data/dataset_mappers/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d54179f1819d7559f03936ad60f3219494fcb338
Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mask_adapter/data/dataset_mappers/__pycache__/__init__.cpython-38.pyc b/mask_adapter/data/dataset_mappers/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c1c994f03aa9bcc090b9bdc75572b100ce5cb665
Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/__init__.cpython-38.pyc differ
diff --git a/mask_adapter/data/dataset_mappers/__pycache__/coco_combine_new_baseline_dataset_mapper.cpython-310.pyc b/mask_adapter/data/dataset_mappers/__pycache__/coco_combine_new_baseline_dataset_mapper.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f7b17c8d22cc19161c434afdd8b8c8c4833b357e
Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/coco_combine_new_baseline_dataset_mapper.cpython-310.pyc differ
diff --git a/mask_adapter/data/dataset_mappers/__pycache__/coco_combine_new_baseline_dataset_mapper.cpython-38.pyc b/mask_adapter/data/dataset_mappers/__pycache__/coco_combine_new_baseline_dataset_mapper.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..640b5d968f097bcb6832b990fc867d3a78f8b855
Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/coco_combine_new_baseline_dataset_mapper.cpython-38.pyc differ
diff --git a/mask_adapter/data/dataset_mappers/__pycache__/coco_instance_new_baseline_dataset_mapper.cpython-310.pyc b/mask_adapter/data/dataset_mappers/__pycache__/coco_instance_new_baseline_dataset_mapper.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..54077cec075535f2b14ec46eee15782d5149cf67
Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/coco_instance_new_baseline_dataset_mapper.cpython-310.pyc differ
diff --git a/mask_adapter/data/dataset_mappers/__pycache__/coco_instance_new_baseline_dataset_mapper.cpython-38.pyc b/mask_adapter/data/dataset_mappers/__pycache__/coco_instance_new_baseline_dataset_mapper.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5c00231b0f41e69b45dab1723618ca5a0d44ff88
Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/coco_instance_new_baseline_dataset_mapper.cpython-38.pyc differ
diff --git a/mask_adapter/data/dataset_mappers/__pycache__/coco_panoptic_new_baseline_dataset_mapper.cpython-310.pyc b/mask_adapter/data/dataset_mappers/__pycache__/coco_panoptic_new_baseline_dataset_mapper.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..27ff8c7a33ebdc431d820c32f5634eb1f0821c3a
Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/coco_panoptic_new_baseline_dataset_mapper.cpython-310.pyc differ
diff --git a/mask_adapter/data/dataset_mappers/__pycache__/coco_panoptic_new_baseline_dataset_mapper.cpython-38.pyc b/mask_adapter/data/dataset_mappers/__pycache__/coco_panoptic_new_baseline_dataset_mapper.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..23d5f0d516bebe6b877b6b33d1326d2b1d961728
Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/coco_panoptic_new_baseline_dataset_mapper.cpython-38.pyc differ
diff --git a/mask_adapter/data/dataset_mappers/__pycache__/grand_new_baseline_dataset_mapper.cpython-38.pyc b/mask_adapter/data/dataset_mappers/__pycache__/grand_new_baseline_dataset_mapper.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..34bb7029396c68226e2a4b8b684a1eb76ba93623
Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/grand_new_baseline_dataset_mapper.cpython-38.pyc differ
diff --git a/mask_adapter/data/dataset_mappers/__pycache__/mask_former_instance_dataset_mapper.cpython-310.pyc b/mask_adapter/data/dataset_mappers/__pycache__/mask_former_instance_dataset_mapper.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..305947bad044a823a0afeead17cb24ce2c61c93e
Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/mask_former_instance_dataset_mapper.cpython-310.pyc differ
diff --git a/mask_adapter/data/dataset_mappers/__pycache__/mask_former_instance_dataset_mapper.cpython-38.pyc b/mask_adapter/data/dataset_mappers/__pycache__/mask_former_instance_dataset_mapper.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c62df5711c56928ccc5a95d1ac5c8d232e5d2a99
Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/mask_former_instance_dataset_mapper.cpython-38.pyc differ
diff --git a/mask_adapter/data/dataset_mappers/__pycache__/mask_former_panoptic_dataset_mapper.cpython-310.pyc b/mask_adapter/data/dataset_mappers/__pycache__/mask_former_panoptic_dataset_mapper.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8b072b17d37d3e9318e29009ca71cb6fd51c98c9
Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/mask_former_panoptic_dataset_mapper.cpython-310.pyc differ
diff --git a/mask_adapter/data/dataset_mappers/__pycache__/mask_former_panoptic_dataset_mapper.cpython-38.pyc b/mask_adapter/data/dataset_mappers/__pycache__/mask_former_panoptic_dataset_mapper.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..31a82d2e1c280547690a62ab81247d78d1fc9ddf
Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/mask_former_panoptic_dataset_mapper.cpython-38.pyc differ
diff --git a/mask_adapter/data/dataset_mappers/__pycache__/mask_former_semantic_dataset_mapper.cpython-310.pyc b/mask_adapter/data/dataset_mappers/__pycache__/mask_former_semantic_dataset_mapper.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..80aed9d2102ec75e6e3e90cbbac05ac39e81e357
Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/mask_former_semantic_dataset_mapper.cpython-310.pyc differ
diff --git a/mask_adapter/data/dataset_mappers/__pycache__/mask_former_semantic_dataset_mapper.cpython-38.pyc b/mask_adapter/data/dataset_mappers/__pycache__/mask_former_semantic_dataset_mapper.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fa903478f69e45811b313a6812c0323dbe90e7eb
Binary files /dev/null and b/mask_adapter/data/dataset_mappers/__pycache__/mask_former_semantic_dataset_mapper.cpython-38.pyc differ
diff --git a/mask_adapter/data/dataset_mappers/coco_combine_new_baseline_dataset_mapper.py b/mask_adapter/data/dataset_mappers/coco_combine_new_baseline_dataset_mapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..9870117a8aa3119385635d8b0a91e5d9a549845b
--- /dev/null
+++ b/mask_adapter/data/dataset_mappers/coco_combine_new_baseline_dataset_mapper.py
@@ -0,0 +1,237 @@
+"""
+This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”).
+All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. 
+
+Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/dataset_mappers/mask_former_instance_dataset_mapper.py
+"""
+
+import copy
+import logging
+
+import numpy as np
+import pycocotools.mask as mask_util
+import torch
+from torch.nn import functional as F
+import time
+from detectron2.config import configurable
+from detectron2.data import detection_utils as utils
+from detectron2.data import transforms as T
+from detectron2.projects.point_rend import ColorAugSSDTransform
+from detectron2.structures import BitMasks, Instances, polygons_to_bitmask, BoxMode,Boxes
+from PIL import Image
+
+__all__ = ["COCOCombineNewBaselineDatasetMapper"]
+
+
+class COCOCombineNewBaselineDatasetMapper:
+    """
+    A callable which takes a dataset dict in Detectron2 Dataset format,
+    and map it into a format used by MaskFormer for instance segmentation.
+
+    The callable currently does the following:
+
+    1. Read the image from "file_name"
+    2. Applies geometric transforms to the image and annotation
+    3. Find and applies suitable cropping to the image and annotation
+    4. Prepare image and annotation to Tensors
+    """
+
+    @configurable
+    def __init__(
+        self,
+        is_train=True,
+        *,
+        augmentations,
+        image_format,
+        size_divisibility,
+    ):
+        """
+        NOTE: this interface is experimental.
+        Args:
+            is_train: for training or inference
+            augmentations: a list of augmentations or deterministic transforms to apply
+            image_format: an image format supported by :func:`detection_utils.read_image`.
+            size_divisibility: pad image size to be divisible by this value
+        """
+        self.is_train = is_train
+        self.tfm_gens = augmentations
+        self.img_format = image_format
+        self.size_divisibility = size_divisibility
+
+        
+        logger = logging.getLogger(__name__)
+        mode = "training" if is_train else "inference"
+        logger.info(f"[{self.__class__.__name__}] Augmentations used in {mode}: {augmentations}")
+
+    @classmethod
+    def from_config(cls, cfg, is_train=True):
+        # Build augmentation
+        image_size = cfg.INPUT.IMAGE_SIZE
+        min_scale = cfg.INPUT.MIN_SCALE
+        max_scale = cfg.INPUT.MAX_SCALE
+        
+        augmentation = []
+        
+        if cfg.INPUT.RANDOM_FLIP != "none":
+            augmentation.append(
+                T.RandomFlip(
+                    horizontal=cfg.INPUT.RANDOM_FLIP == "horizontal",
+                    vertical=cfg.INPUT.RANDOM_FLIP == "vertical",
+                )
+            )
+        augmentation.extend([
+            T.ResizeScale(
+                min_scale=min_scale, max_scale=max_scale, target_height=image_size, target_width=image_size
+            ),
+            T.FixedSizeCrop(crop_size=(image_size, image_size)),
+        ])
+
+        if cfg.INPUT.COLOR_AUG_SSD:
+            augmentation.append(ColorAugSSDTransform(img_format=cfg.INPUT.FORMAT))
+
+        ret = {
+            "is_train": is_train,
+            "augmentations": augmentation,
+            "image_format": cfg.INPUT.FORMAT,
+            "size_divisibility": cfg.INPUT.SIZE_DIVISIBILITY,
+        }
+        return ret
+
+    def __call__(self, dataset_dict):
+        """
+        Args:
+            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
+
+        Returns:
+            dict: a format that builtin models in detectron2 accept
+        """
+
+        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
+        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
+        image_shape_before = image.shape[:2]
+        utils.check_image_size(dataset_dict, image)
+
+        aug_input = T.AugInput(image)
+        aug_input, transforms = T.apply_transform_gens(self.tfm_gens, aug_input)
+        image = aug_input.image
+        image_shape = image.shape[:2]
+        
+        # transform instnace masks
+        #assert "annotations" in dataset_dict
+        if "annotations" in dataset_dict  :
+            
+            file_name = dataset_dict["file_name"]
+            if "obj365" in file_name:
+                dataset_dict["dataname"] = "objects365_v1_masktrain"
+            elif "stuff" in file_name:
+                dataset_dict["dataname"] = "openvocab_coco_2017_train_stuff_sem_seg"
+            else:
+                dataset_dict["dataname"] = "lvis_v1_train"
+                
+            for anno in dataset_dict["annotations"]:
+                anno.pop("keypoints", None)
+
+            annos = [
+                utils.transform_instance_annotations(obj, transforms, image.shape[:2])
+                for obj in dataset_dict.pop("annotations")
+                if obj.get("iscrowd", 0) == 0
+            ]
+
+            if len(annos):
+                assert "segmentation" in annos[0]
+            segms = [obj["segmentation"] for obj in annos]
+            masks = []
+            for segm in segms:
+                
+                if isinstance(segm, list):
+                    # polygon
+                    masks.append(polygons_to_bitmask(segm, *image.shape[:2]))
+                elif isinstance(segm, dict):
+                    # COCO RLE
+                    masks.append(mask_util.decode(segm))
+                elif isinstance(segm, np.ndarray):
+                    assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
+                        segm.ndim
+                    )
+                    assert dataset_dict["dataname"] == "objects365_v1_masktrain"
+                    # mask array
+                    masks.append(segm)
+                else:
+                    raise ValueError(
+                        "Cannot convert segmentation of type '{}' to BitMasks!"
+                        "Supported types are: polygons as list[list[float] or ndarray],"
+                        " COCO-style RLE as a dict, or a binary segmentation mask "
+                        " in a 2D numpy array of shape HxW.".format(type(segm))
+                    )
+
+            # Pad image and segmentation label here!
+            image = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
+            masks = [torch.from_numpy(np.ascontiguousarray(x)) for x in masks]
+
+            classes = [int(obj["category_id"]) for obj in annos]
+            classes = torch.tensor(classes, dtype=torch.int64)
+        elif "pan_seg_file_name" in dataset_dict:
+            pan_seg_gt = utils.read_image(dataset_dict.pop("pan_seg_file_name"), "RGB")
+            segments_info = dataset_dict["segments_info"]
+
+            # apply the same transformation to panoptic segmentation
+
+            pan_seg_gt = transforms.apply_segmentation(pan_seg_gt)
+
+            from panopticapi.utils import rgb2id
+
+            pan_seg_gt = rgb2id(pan_seg_gt)
+
+            instances = Instances(image_shape)
+            classes = []
+            masks = []
+            for segment_info in segments_info:
+                class_id = segment_info["category_id"]
+                if not segment_info["iscrowd"]:
+                    classes.append(class_id)
+                    masks.append(pan_seg_gt == segment_info["id"])
+
+            classes = np.array(classes)
+            classes = torch.tensor(classes, dtype=torch.int64)
+            masks = [torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks]
+            image = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
+
+            
+            
+        if self.size_divisibility > 0:
+            image_size = (image.shape[-2], image.shape[-1])
+            padding_size = [
+                0,
+                self.size_divisibility - image_size[1],
+                0,
+                self.size_divisibility - image_size[0],
+            ]
+            # pad image
+            image = F.pad(image, padding_size, value=128).contiguous()
+            # pad mask
+            masks = [F.pad(x, padding_size, value=0).contiguous() for x in masks]
+
+        image_shape = (image.shape[-2], image.shape[-1])  # h, w
+
+        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
+        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
+        # Therefore it's important to use torch.Tensor.
+        dataset_dict["image"] = image
+
+        # Prepare per-category binary masks
+        instances = Instances(image_shape)
+        instances.gt_classes = classes
+        
+        #boxes = np.zeros((0, 4))
+        if len(masks) == 0:
+            # Some image does not have annotation (all ignored)
+            instances.gt_masks = torch.zeros((0, image.shape[-2], image.shape[-1]))
+        else:
+            masks = BitMasks(torch.stack(masks))
+            instances.gt_masks = masks #.tensor
+
+        #instances.gt_boxes = Boxes(boxes)
+        dataset_dict["instances"] = instances
+
+        return dataset_dict
+
diff --git a/mask_adapter/data/dataset_mappers/coco_instance_new_baseline_dataset_mapper.py b/mask_adapter/data/dataset_mappers/coco_instance_new_baseline_dataset_mapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a7fc6f86a3f371f4f32bb35b2189b5837eb9f78
--- /dev/null
+++ b/mask_adapter/data/dataset_mappers/coco_instance_new_baseline_dataset_mapper.py
@@ -0,0 +1,194 @@
+"""
+This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”).
+All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. 
+
+Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/dataset_mappers/coco_instance_new_baseline_dataset_mapper.py
+"""
+
+import copy
+import logging
+
+import numpy as np
+import torch
+
+from detectron2.config import configurable
+from detectron2.data import detection_utils as utils
+from detectron2.data import transforms as T
+from detectron2.data.transforms import TransformGen
+from detectron2.structures import BitMasks, Instances
+
+from pycocotools import mask as coco_mask
+
+__all__ = ["COCOInstanceNewBaselineDatasetMapper"]
+
+
+def convert_coco_poly_to_mask(segmentations, height, width):
+    masks = []
+    for polygons in segmentations:
+        rles = coco_mask.frPyObjects(polygons, height, width)
+        mask = coco_mask.decode(rles)
+        if len(mask.shape) < 3:
+            mask = mask[..., None]
+        mask = torch.as_tensor(mask, dtype=torch.uint8)
+        mask = mask.any(dim=2)
+        masks.append(mask)
+    if masks:
+        masks = torch.stack(masks, dim=0)
+    else:
+        masks = torch.zeros((0, height, width), dtype=torch.uint8)
+    return masks
+
+
+def build_transform_gen(cfg, is_train):
+    """
+    Create a list of default :class:`Augmentation` from config.
+    Now it includes resizing and flipping.
+    Returns:
+        list[Augmentation]
+    """
+    assert is_train, "Only support training augmentation"
+    image_size = cfg.INPUT.IMAGE_SIZE
+    min_scale = cfg.INPUT.MIN_SCALE
+    max_scale = cfg.INPUT.MAX_SCALE
+
+    augmentation = []
+
+    if cfg.INPUT.RANDOM_FLIP != "none":
+        augmentation.append(
+            T.RandomFlip(
+                horizontal=cfg.INPUT.RANDOM_FLIP == "horizontal",
+                vertical=cfg.INPUT.RANDOM_FLIP == "vertical",
+            )
+        )
+
+    augmentation.extend([
+        T.ResizeScale(
+            min_scale=min_scale, max_scale=max_scale, target_height=image_size, target_width=image_size
+        ),
+        T.FixedSizeCrop(crop_size=(image_size, image_size)),
+    ])
+
+    return augmentation
+
+
+# This is specifically designed for the COCO dataset.
+class COCOInstanceNewBaselineDatasetMapper:
+    """
+    A callable which takes a dataset dict in Detectron2 Dataset format,
+    and map it into a format used by MaskFormer.
+
+    This dataset mapper applies the same transformation as DETR for COCO panoptic segmentation.
+
+    The callable currently does the following:
+
+    1. Read the image from "file_name"
+    2. Applies geometric transforms to the image and annotation
+    3. Find and applies suitable cropping to the image and annotation
+    4. Prepare image and annotation to Tensors
+    """
+
+    @configurable
+    def __init__(
+        self,
+        is_train=True,
+        *,
+        tfm_gens,
+        image_format,
+    ):
+        """
+        NOTE: this interface is experimental.
+        Args:
+            is_train: for training or inference
+            augmentations: a list of augmentations or deterministic transforms to apply
+            tfm_gens: data augmentation
+            image_format: an image format supported by :func:`detection_utils.read_image`.
+        """
+        self.tfm_gens = tfm_gens
+        logging.getLogger(__name__).info(
+            "[COCOInstanceNewBaselineDatasetMapper] Full TransformGens used in training: {}".format(str(self.tfm_gens))
+        )
+
+        self.img_format = image_format
+        self.is_train = is_train
+    
+    @classmethod
+    def from_config(cls, cfg, is_train=True):
+        # Build augmentation
+        tfm_gens = build_transform_gen(cfg, is_train)
+
+        ret = {
+            "is_train": is_train,
+            "tfm_gens": tfm_gens,
+            "image_format": cfg.INPUT.FORMAT,
+        }
+        return ret
+
+    def __call__(self, dataset_dict):
+        """
+        Args:
+            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
+
+        Returns:
+            dict: a format that builtin models in detectron2 accept
+        """
+        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
+        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
+        utils.check_image_size(dataset_dict, image)
+
+        # TODO: get padding mask
+        # by feeding a "segmentation mask" to the same transforms
+        padding_mask = np.ones(image.shape[:2])
+
+        image, transforms = T.apply_transform_gens(self.tfm_gens, image)
+        # the crop transformation has default padding value 0 for segmentation
+        padding_mask = transforms.apply_segmentation(padding_mask)
+        padding_mask = ~ padding_mask.astype(bool)
+
+        image_shape = image.shape[:2]  # h, w
+
+        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
+        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
+        # Therefore it's important to use torch.Tensor.
+        dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
+        dataset_dict["padding_mask"] = torch.as_tensor(np.ascontiguousarray(padding_mask))
+
+        if not self.is_train:
+            # USER: Modify this if you want to keep them for some reason.
+            dataset_dict.pop("annotations", None)
+            return dataset_dict
+
+        if "annotations" in dataset_dict:
+            # USER: Modify this if you want to keep them for some reason.
+            for anno in dataset_dict["annotations"]:
+                # Let's always keep mask
+                # if not self.mask_on:
+                #     anno.pop("segmentation", None)
+                anno.pop("keypoints", None)
+
+            # USER: Implement additional transformations if you have other types of data
+            annos = [
+                utils.transform_instance_annotations(obj, transforms, image_shape)
+                for obj in dataset_dict.pop("annotations")
+                if obj.get("iscrowd", 0) == 0
+            ]
+            # NOTE: does not support BitMask due to augmentation
+            # Current BitMask cannot handle empty objects
+            instances = utils.annotations_to_instances(annos, image_shape)
+            # After transforms such as cropping are applied, the bounding box may no longer
+            # tightly bound the object. As an example, imagine a triangle object
+            # [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
+            # bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
+            # the intersection of original bounding box and the cropping box.
+            instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
+            # Need to filter empty instances first (due to augmentation)
+            instances = utils.filter_empty_instances(instances)
+            # Generate masks from polygon
+            h, w = instances.image_size
+            # image_size_xyxy = torch.as_tensor([w, h, w, h], dtype=torch.float)
+            if hasattr(instances, 'gt_masks'):
+                gt_masks = instances.gt_masks
+                gt_masks = convert_coco_poly_to_mask(gt_masks.polygons, h, w)
+                instances.gt_masks = gt_masks
+            dataset_dict["instances"] = instances
+
+        return dataset_dict
diff --git a/mask_adapter/data/dataset_mappers/coco_panoptic_new_baseline_dataset_mapper.py b/mask_adapter/data/dataset_mappers/coco_panoptic_new_baseline_dataset_mapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..89e616a60434a1d66edd10e73f3c1d53438c6d8d
--- /dev/null
+++ b/mask_adapter/data/dataset_mappers/coco_panoptic_new_baseline_dataset_mapper.py
@@ -0,0 +1,170 @@
+"""
+This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”).
+All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. 
+
+Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/dataset_mappers/coco_panoptic_new_baseline_dataset_mapper.py
+"""
+
+import copy
+import logging
+
+import numpy as np
+import torch
+
+from detectron2.config import configurable
+from detectron2.data import detection_utils as utils
+from detectron2.data import transforms as T
+from detectron2.data.transforms import TransformGen
+from detectron2.structures import BitMasks, Boxes, Instances
+
+__all__ = ["COCOPanopticNewBaselineDatasetMapper"]
+
+
+def build_transform_gen(cfg, is_train):
+    """
+    Create a list of default :class:`Augmentation` from config.
+    Now it includes resizing and flipping.
+    Returns:
+        list[Augmentation]
+    """
+    assert is_train, "Only support training augmentation"
+    image_size = cfg.INPUT.IMAGE_SIZE
+    min_scale = cfg.INPUT.MIN_SCALE
+    max_scale = cfg.INPUT.MAX_SCALE
+
+    augmentation = []
+
+    if cfg.INPUT.RANDOM_FLIP != "none":
+        augmentation.append(
+            T.RandomFlip(
+                horizontal=cfg.INPUT.RANDOM_FLIP == "horizontal",
+                vertical=cfg.INPUT.RANDOM_FLIP == "vertical",
+            )
+        )
+
+    augmentation.extend([
+        T.ResizeScale(
+            min_scale=min_scale, max_scale=max_scale, target_height=image_size, target_width=image_size
+        ),
+        T.FixedSizeCrop(crop_size=(image_size, image_size)),
+    ])
+
+    return augmentation
+
+
+# This is specifically designed for the COCO dataset.
+class COCOPanopticNewBaselineDatasetMapper:
+    """
+    A callable which takes a dataset dict in Detectron2 Dataset format,
+    and map it into a format used by MaskFormer.
+
+    This dataset mapper applies the same transformation as DETR for COCO panoptic segmentation.
+
+    The callable currently does the following:
+
+    1. Read the image from "file_name"
+    2. Applies geometric transforms to the image and annotation
+    3. Find and applies suitable cropping to the image and annotation
+    4. Prepare image and annotation to Tensors
+    """
+
+    @configurable
+    def __init__(
+        self,
+        is_train=True,
+        *,
+        tfm_gens,
+        image_format,
+    ):
+        """
+        NOTE: this interface is experimental.
+        Args:
+            is_train: for training or inference
+            augmentations: a list of augmentations or deterministic transforms to apply
+            crop_gen: crop augmentation
+            tfm_gens: data augmentation
+            image_format: an image format supported by :func:`detection_utils.read_image`.
+        """
+        self.tfm_gens = tfm_gens
+        logging.getLogger(__name__).info(
+            "[COCOPanopticNewBaselineDatasetMapper] Full TransformGens used in training: {}".format(
+                str(self.tfm_gens)
+            )
+        )
+
+        self.img_format = image_format
+        self.is_train = is_train
+
+    @classmethod
+    def from_config(cls, cfg, is_train=True):
+        # Build augmentation
+        tfm_gens = build_transform_gen(cfg, is_train)
+
+        ret = {
+            "is_train": is_train,
+            "tfm_gens": tfm_gens,
+            "image_format": cfg.INPUT.FORMAT,
+        }
+        return ret
+
+    def __call__(self, dataset_dict):
+        """
+        Args:
+            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
+
+        Returns:
+            dict: a format that builtin models in detectron2 accept
+        """
+        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
+        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
+        utils.check_image_size(dataset_dict, image)
+
+        image, transforms = T.apply_transform_gens(self.tfm_gens, image)
+        image_shape = image.shape[:2]  # h, w
+
+        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
+        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
+        # Therefore it's important to use torch.Tensor.
+        dataset_dict["image"] = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
+
+        if not self.is_train:
+            # USER: Modify this if you want to keep them for some reason.
+            dataset_dict.pop("annotations", None)
+            return dataset_dict
+
+        if "pan_seg_file_name" in dataset_dict:
+            pan_seg_gt = utils.read_image(dataset_dict.pop("pan_seg_file_name"), "RGB")
+            segments_info = dataset_dict["segments_info"]
+
+            # apply the same transformation to panoptic segmentation
+            pan_seg_gt = transforms.apply_segmentation(pan_seg_gt)
+
+            from panopticapi.utils import rgb2id
+
+            pan_seg_gt = rgb2id(pan_seg_gt)
+
+            instances = Instances(image_shape)
+            classes = []
+            masks = []
+            for segment_info in segments_info:
+                class_id = segment_info["category_id"]
+                if not segment_info["iscrowd"]:
+                    classes.append(class_id)
+                    masks.append(pan_seg_gt == segment_info["id"])
+
+            classes = np.array(classes)
+            instances.gt_classes = torch.tensor(classes, dtype=torch.int64)
+            if len(masks) == 0:
+                # Some image does not have annotation (all ignored)
+                instances.gt_masks = torch.zeros((0, pan_seg_gt.shape[-2], pan_seg_gt.shape[-1]))
+                instances.gt_boxes = Boxes(torch.zeros((0, 4)))
+            else:
+                masks = BitMasks(
+                    torch.stack([torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks])
+                )
+                instances.gt_masks = masks.tensor
+                instances.gt_boxes = masks.get_bounding_boxes()
+
+            dataset_dict["instances"] = instances
+
+        return dataset_dict
diff --git a/mask_adapter/data/dataset_mappers/mask_former_instance_dataset_mapper.py b/mask_adapter/data/dataset_mappers/mask_former_instance_dataset_mapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..783cc86c1f87542b69e11db3464fd57afd89a086
--- /dev/null
+++ b/mask_adapter/data/dataset_mappers/mask_former_instance_dataset_mapper.py
@@ -0,0 +1,186 @@
+"""
+This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”).
+All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. 
+
+Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/dataset_mappers/mask_former_instance_dataset_mapper.py
+"""
+
+import copy
+import logging
+
+import numpy as np
+import pycocotools.mask as mask_util
+import torch
+from torch.nn import functional as F
+
+from detectron2.config import configurable
+from detectron2.data import detection_utils as utils
+from detectron2.data import transforms as T
+from detectron2.projects.point_rend import ColorAugSSDTransform
+from detectron2.structures import BitMasks, Instances, polygons_to_bitmask
+
+__all__ = ["MaskFormerInstanceDatasetMapper"]
+
+
+class MaskFormerInstanceDatasetMapper:
+    """
+    A callable which takes a dataset dict in Detectron2 Dataset format,
+    and map it into a format used by MaskFormer for instance segmentation.
+
+    The callable currently does the following:
+
+    1. Read the image from "file_name"
+    2. Applies geometric transforms to the image and annotation
+    3. Find and applies suitable cropping to the image and annotation
+    4. Prepare image and annotation to Tensors
+    """
+
+    @configurable
+    def __init__(
+        self,
+        is_train=True,
+        *,
+        augmentations,
+        image_format,
+        size_divisibility,
+    ):
+        """
+        NOTE: this interface is experimental.
+        Args:
+            is_train: for training or inference
+            augmentations: a list of augmentations or deterministic transforms to apply
+            image_format: an image format supported by :func:`detection_utils.read_image`.
+            size_divisibility: pad image size to be divisible by this value
+        """
+        self.is_train = is_train
+        self.tfm_gens = augmentations
+        self.img_format = image_format
+        self.size_divisibility = size_divisibility
+
+        logger = logging.getLogger(__name__)
+        mode = "training" if is_train else "inference"
+        logger.info(f"[{self.__class__.__name__}] Augmentations used in {mode}: {augmentations}")
+
+    @classmethod
+    def from_config(cls, cfg, is_train=True):
+        # Build augmentation
+        augs = [
+            T.ResizeShortestEdge(
+                cfg.INPUT.MIN_SIZE_TRAIN,
+                cfg.INPUT.MAX_SIZE_TRAIN,
+                cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING,
+            )
+        ]
+        if cfg.INPUT.CROP.ENABLED:
+            augs.append(
+                T.RandomCrop(
+                    cfg.INPUT.CROP.TYPE,
+                    cfg.INPUT.CROP.SIZE,
+                )
+            )
+        if cfg.INPUT.COLOR_AUG_SSD:
+            augs.append(ColorAugSSDTransform(img_format=cfg.INPUT.FORMAT))
+        augs.append(T.RandomFlip())
+
+        ret = {
+            "is_train": is_train,
+            "augmentations": augs,
+            "image_format": cfg.INPUT.FORMAT,
+            "size_divisibility": cfg.INPUT.SIZE_DIVISIBILITY,
+        }
+        return ret
+
+    def __call__(self, dataset_dict):
+        """
+        Args:
+            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
+
+        Returns:
+            dict: a format that builtin models in detectron2 accept
+        """
+        assert self.is_train, "MaskFormerPanopticDatasetMapper should only be used for training!"
+
+        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
+        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
+        utils.check_image_size(dataset_dict, image)
+
+        aug_input = T.AugInput(image)
+        aug_input, transforms = T.apply_transform_gens(self.tfm_gens, aug_input)
+        image = aug_input.image
+
+        # transform instnace masks
+        assert "annotations" in dataset_dict
+        for anno in dataset_dict["annotations"]:
+            anno.pop("keypoints", None)
+
+        annos = [
+            utils.transform_instance_annotations(obj, transforms, image.shape[:2])
+            for obj in dataset_dict.pop("annotations")
+            if obj.get("iscrowd", 0) == 0
+        ]
+
+        if len(annos):
+            assert "segmentation" in annos[0]
+        segms = [obj["segmentation"] for obj in annos]
+        masks = []
+        for segm in segms:
+            if isinstance(segm, list):
+                # polygon
+                masks.append(polygons_to_bitmask(segm, *image.shape[:2]))
+            elif isinstance(segm, dict):
+                # COCO RLE
+                masks.append(mask_util.decode(segm))
+            elif isinstance(segm, np.ndarray):
+                assert segm.ndim == 2, "Expect segmentation of 2 dimensions, got {}.".format(
+                    segm.ndim
+                )
+                # mask array
+                masks.append(segm)
+            else:
+                raise ValueError(
+                    "Cannot convert segmentation of type '{}' to BitMasks!"
+                    "Supported types are: polygons as list[list[float] or ndarray],"
+                    " COCO-style RLE as a dict, or a binary segmentation mask "
+                    " in a 2D numpy array of shape HxW.".format(type(segm))
+                )
+
+        # Pad image and segmentation label here!
+        image = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
+        masks = [torch.from_numpy(np.ascontiguousarray(x)) for x in masks]
+
+        classes = [int(obj["category_id"]) for obj in annos]
+        classes = torch.tensor(classes, dtype=torch.int64)
+
+        if self.size_divisibility > 0:
+            image_size = (image.shape[-2], image.shape[-1])
+            padding_size = [
+                0,
+                self.size_divisibility - image_size[1],
+                0,
+                self.size_divisibility - image_size[0],
+            ]
+            # pad image
+            image = F.pad(image, padding_size, value=128).contiguous()
+            # pad mask
+            masks = [F.pad(x, padding_size, value=0).contiguous() for x in masks]
+
+        image_shape = (image.shape[-2], image.shape[-1])  # h, w
+
+        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
+        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
+        # Therefore it's important to use torch.Tensor.
+        dataset_dict["image"] = image
+
+        # Prepare per-category binary masks
+        instances = Instances(image_shape)
+        instances.gt_classes = classes
+        if len(masks) == 0:
+            # Some image does not have annotation (all ignored)
+            instances.gt_masks = torch.zeros((0, image.shape[-2], image.shape[-1]))
+        else:
+            masks = BitMasks(torch.stack(masks))
+            instances.gt_masks = masks.tensor
+
+        dataset_dict["instances"] = instances
+
+        return dataset_dict
diff --git a/mask_adapter/data/dataset_mappers/mask_former_panoptic_dataset_mapper.py b/mask_adapter/data/dataset_mappers/mask_former_panoptic_dataset_mapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..a3c618ee2ae78484cd7689df49dd481d5a19e94a
--- /dev/null
+++ b/mask_adapter/data/dataset_mappers/mask_former_panoptic_dataset_mapper.py
@@ -0,0 +1,171 @@
+"""
+This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”).
+All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. 
+
+Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/dataset_mappers/mask_former_panoptic_dataset_mapper.py
+"""
+
+import copy
+import logging
+
+import numpy as np
+import torch
+from torch.nn import functional as F
+
+from detectron2.config import configurable
+from detectron2.data import detection_utils as utils
+from detectron2.data import transforms as T
+from detectron2.structures import BitMasks, Instances
+
+from .mask_former_semantic_dataset_mapper import MaskFormerSemanticDatasetMapper
+
+__all__ = ["MaskFormerPanopticDatasetMapper"]
+
+
+class MaskFormerPanopticDatasetMapper(MaskFormerSemanticDatasetMapper):
+    """
+    A callable which takes a dataset dict in Detectron2 Dataset format,
+    and map it into a format used by MaskFormer for panoptic segmentation.
+
+    The callable currently does the following:
+
+    1. Read the image from "file_name"
+    2. Applies geometric transforms to the image and annotation
+    3. Find and applies suitable cropping to the image and annotation
+    4. Prepare image and annotation to Tensors
+    """
+
+    @configurable
+    def __init__(
+        self,
+        is_train=True,
+        *,
+        augmentations,
+        image_format,
+        ignore_label,
+        size_divisibility,
+    ):
+        """
+        NOTE: this interface is experimental.
+        Args:
+            is_train: for training or inference
+            augmentations: a list of augmentations or deterministic transforms to apply
+            image_format: an image format supported by :func:`detection_utils.read_image`.
+            ignore_label: the label that is ignored to evaluation
+            size_divisibility: pad image size to be divisible by this value
+        """
+        super().__init__(
+            is_train,
+            augmentations=augmentations,
+            image_format=image_format,
+            ignore_label=ignore_label,
+            size_divisibility=size_divisibility,
+        )
+
+    def __call__(self, dataset_dict):
+        """
+        Args:
+            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
+
+        Returns:
+            dict: a format that builtin models in detectron2 accept
+        """
+        assert self.is_train, "MaskFormerPanopticDatasetMapper should only be used for training!"
+
+        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
+        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
+        utils.check_image_size(dataset_dict, image)
+
+        # semantic segmentation
+        if "sem_seg_file_name" in dataset_dict:
+            # PyTorch transformation not implemented for uint16, so converting it to double first
+            sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name")).astype("double")
+        else:
+            sem_seg_gt = None
+
+        # panoptic segmentation
+        if "pan_seg_file_name" in dataset_dict:
+            pan_seg_gt = utils.read_image(dataset_dict.pop("pan_seg_file_name"), "RGB")
+            segments_info = dataset_dict["segments_info"]
+        else:
+            pan_seg_gt = None
+            segments_info = None
+
+        if pan_seg_gt is None:
+            raise ValueError(
+                "Cannot find 'pan_seg_file_name' for panoptic segmentation dataset {}.".format(
+                    dataset_dict["file_name"]
+                )
+            )
+
+        aug_input = T.AugInput(image, sem_seg=sem_seg_gt)
+        aug_input, transforms = T.apply_transform_gens(self.tfm_gens, aug_input)
+        image = aug_input.image
+        if sem_seg_gt is not None:
+            sem_seg_gt = aug_input.sem_seg
+
+        # apply the same transformation to panoptic segmentation
+        pan_seg_gt = transforms.apply_segmentation(pan_seg_gt)
+
+        from panopticapi.utils import rgb2id
+
+        pan_seg_gt = rgb2id(pan_seg_gt)
+
+        # Pad image and segmentation label here!
+        image = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
+        if sem_seg_gt is not None:
+            sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
+        pan_seg_gt = torch.as_tensor(pan_seg_gt.astype("long"))
+
+        if self.size_divisibility > 0:
+            image_size = (image.shape[-2], image.shape[-1])
+            padding_size = [
+                0,
+                self.size_divisibility - image_size[1],
+                0,
+                self.size_divisibility - image_size[0],
+            ]
+            image = F.pad(image, padding_size, value=128).contiguous()
+            if sem_seg_gt is not None:
+                sem_seg_gt = F.pad(sem_seg_gt, padding_size, value=self.ignore_label).contiguous()
+            pan_seg_gt = F.pad(
+                pan_seg_gt, padding_size, value=0
+            ).contiguous()  # 0 is the VOID panoptic label
+
+        image_shape = (image.shape[-2], image.shape[-1])  # h, w
+
+        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
+        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
+        # Therefore it's important to use torch.Tensor.
+        dataset_dict["image"] = image
+        if sem_seg_gt is not None:
+            dataset_dict["sem_seg"] = sem_seg_gt.long()
+
+        if "annotations" in dataset_dict:
+            raise ValueError("Pemantic segmentation dataset should not have 'annotations'.")
+
+        # Prepare per-category binary masks
+        pan_seg_gt = pan_seg_gt.numpy()
+        instances = Instances(image_shape)
+        classes = []
+        masks = []
+        for segment_info in segments_info:
+            class_id = segment_info["category_id"]
+            if not segment_info["iscrowd"]:
+                classes.append(class_id)
+                masks.append(pan_seg_gt == segment_info["id"])
+
+        classes = np.array(classes)
+        instances.gt_classes = torch.tensor(classes, dtype=torch.int64)
+        if len(masks) == 0:
+            # Some image does not have annotation (all ignored)
+            instances.gt_masks = torch.zeros((0, pan_seg_gt.shape[-2], pan_seg_gt.shape[-1]))
+        else:
+            masks = BitMasks(
+                torch.stack([torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks])
+            )
+            instances.gt_masks = masks.tensor
+
+        dataset_dict["instances"] = instances
+
+        return dataset_dict
diff --git a/mask_adapter/data/dataset_mappers/mask_former_semantic_dataset_mapper.py b/mask_adapter/data/dataset_mappers/mask_former_semantic_dataset_mapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..c16e1f3e68c9522f51df6fb1ca3663bdf0242df7
--- /dev/null
+++ b/mask_adapter/data/dataset_mappers/mask_former_semantic_dataset_mapper.py
@@ -0,0 +1,215 @@
+"""
+This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”).
+All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. 
+
+Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/dataset_mappers/mask_former_semantic_dataset_mapper.py
+"""
+
+import copy
+import logging
+
+import numpy as np
+import torch
+from torch.nn import functional as F
+
+from detectron2.config import configurable
+from detectron2.data import MetadataCatalog
+from detectron2.data import detection_utils as utils
+from detectron2.data import transforms as T
+from detectron2.projects.point_rend import ColorAugSSDTransform
+from detectron2.structures import BitMasks, Instances
+
+__all__ = ["MaskFormerSemanticDatasetMapper"]
+
+
+class MaskFormerSemanticDatasetMapper:
+    """
+    A callable which takes a dataset dict in Detectron2 Dataset format,
+    and map it into a format used by MaskFormer for semantic segmentation.
+
+    The callable currently does the following:
+
+    1. Read the image from "file_name"
+    2. Applies geometric transforms to the image and annotation
+    3. Find and applies suitable cropping to the image and annotation
+    4. Prepare image and annotation to Tensors
+    """
+
+    @configurable
+    def __init__(
+        self,
+        is_train=True,
+        *,
+        augmentations,
+        image_format,
+        ignore_label,
+        size_divisibility,
+    ):
+        """
+        NOTE: this interface is experimental.
+        Args:
+            is_train: for training or inference
+            augmentations: a list of augmentations or deterministic transforms to apply
+            image_format: an image format supported by :func:`detection_utils.read_image`.
+            ignore_label: the label that is ignored to evaluation
+            size_divisibility: pad image size to be divisible by this value
+        """
+        self.is_train = is_train
+        self.tfm_gens = augmentations
+        self.img_format = image_format
+        self.ignore_label = ignore_label
+        self.size_divisibility = size_divisibility
+
+        logger = logging.getLogger(__name__)
+        mode = "training" if is_train else "inference"
+        logger.info(f"[{self.__class__.__name__}] Augmentations used in {mode}: {augmentations}")
+
+    @classmethod
+    def from_config(cls, cfg, is_train=True):
+        # Build augmentation
+        # augs = [
+        #     T.ResizeShortestEdge(
+        #         cfg.INPUT.MIN_SIZE_TRAIN,
+        #         cfg.INPUT.MAX_SIZE_TRAIN,
+        #         cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING,
+        #     )
+        # ]
+        # if cfg.INPUT.CROP.ENABLED:
+        #     augs.append(
+        #         T.RandomCrop_CategoryAreaConstraint(
+        #             cfg.INPUT.CROP.TYPE,
+        #             cfg.INPUT.CROP.SIZE,
+        #             cfg.INPUT.CROP.SINGLE_CATEGORY_MAX_AREA,
+        #             cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE,
+        #         )
+        #     )
+        # if cfg.INPUT.COLOR_AUG_SSD:
+        #     augs.append(ColorAugSSDTransform(img_format=cfg.INPUT.FORMAT))
+        # augs.append(T.RandomFlip())
+
+        image_size = cfg.INPUT.IMAGE_SIZE
+        min_scale = cfg.INPUT.MIN_SCALE
+        max_scale = cfg.INPUT.MAX_SCALE
+
+        augmentation = []
+
+        if cfg.INPUT.RANDOM_FLIP != "none":
+            augmentation.append(
+                T.RandomFlip(
+                    horizontal=cfg.INPUT.RANDOM_FLIP == "horizontal",
+                    vertical=cfg.INPUT.RANDOM_FLIP == "vertical",
+                )
+            )
+
+        augmentation.extend([
+            T.ResizeScale(
+                min_scale=min_scale, max_scale=max_scale, target_height=image_size, target_width=image_size
+            ),
+            T.FixedSizeCrop(crop_size=(image_size, image_size)),
+        ])
+
+        # Assume always applies to the training set.
+        dataset_names = cfg.DATASETS.TRAIN
+        meta = MetadataCatalog.get(dataset_names[0])
+        ignore_label = meta.ignore_label
+
+        ret = {
+            "is_train": is_train,
+            "augmentations": augmentation,
+            "image_format": cfg.INPUT.FORMAT,
+            "ignore_label": ignore_label,
+            "size_divisibility": cfg.INPUT.SIZE_DIVISIBILITY,
+        }
+        return ret
+
+    def __call__(self, dataset_dict):
+        """
+        Args:
+            dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
+
+        Returns:
+            dict: a format that builtin models in detectron2 accept
+        """
+        assert self.is_train, "MaskFormerSemanticDatasetMapper should only be used for training!"
+
+        dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
+        image = utils.read_image(dataset_dict["file_name"], format=self.img_format)
+        utils.check_image_size(dataset_dict, image)
+
+        file_name = dataset_dict["file_name"]
+        if "stuff" in file_name:
+            dataset_dict["dataname"] = "openvocab_coco_2017_train_stuff_sem_seg"
+                
+        if "sem_seg_file_name" in dataset_dict:
+            # PyTorch transformation not implemented for uint16, so converting it to double first
+            sem_seg_gt = utils.read_image(dataset_dict.pop("sem_seg_file_name")).astype("double")
+        else:
+            sem_seg_gt = None
+
+        if sem_seg_gt is None:
+            raise ValueError(
+                "Cannot find 'sem_seg_file_name' for semantic segmentation dataset {}.".format(
+                    dataset_dict["file_name"]
+                )
+            )
+
+        aug_input = T.AugInput(image, sem_seg=sem_seg_gt)
+        aug_input, transforms = T.apply_transform_gens(self.tfm_gens, aug_input)
+        image = aug_input.image
+        sem_seg_gt = aug_input.sem_seg
+
+        # Pad image and segmentation label here!
+        image = torch.as_tensor(np.ascontiguousarray(image.transpose(2, 0, 1)))
+        if sem_seg_gt is not None:
+            sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long"))
+
+        if self.size_divisibility > 0:
+            image_size = (image.shape[-2], image.shape[-1])
+            padding_size = [
+                0,
+                self.size_divisibility - image_size[1],
+                0,
+                self.size_divisibility - image_size[0],
+            ]
+            image = F.pad(image, padding_size, value=128).contiguous()
+            if sem_seg_gt is not None:
+                sem_seg_gt = F.pad(sem_seg_gt, padding_size, value=self.ignore_label).contiguous()
+
+        image_shape = (image.shape[-2], image.shape[-1])  # h, w
+
+        # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory,
+        # but not efficient on large generic data structures due to the use of pickle & mp.Queue.
+        # Therefore it's important to use torch.Tensor.
+        dataset_dict["image"] = image
+
+        if sem_seg_gt is not None:
+            dataset_dict["sem_seg"] = sem_seg_gt.long()
+
+        if "annotations" in dataset_dict:
+            raise ValueError("Semantic segmentation dataset should not have 'annotations'.")
+
+        # Prepare per-category binary masks
+        if sem_seg_gt is not None:
+            sem_seg_gt = sem_seg_gt.numpy()
+            instances = Instances(image_shape)
+            classes = np.unique(sem_seg_gt)
+            # remove ignored region
+            classes = classes[classes != self.ignore_label]
+            instances.gt_classes = torch.tensor(classes, dtype=torch.int64)
+
+            masks = []
+            for class_id in classes:
+                masks.append(sem_seg_gt == class_id)
+
+            if len(masks) == 0:
+                # Some image does not have annotation (all ignored)
+                instances.gt_masks = torch.zeros((0, sem_seg_gt.shape[-2], sem_seg_gt.shape[-1]))
+            else:
+                masks = BitMasks(
+                    torch.stack([torch.from_numpy(np.ascontiguousarray(x.copy())) for x in masks])
+                )
+                instances.gt_masks = masks.tensor
+
+            dataset_dict["instances"] = instances
+
+        return dataset_dict
diff --git a/mask_adapter/data/datasets/__init__.py b/mask_adapter/data/datasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..f14dc2bbe3f53c9c50a7ee44d8bf7d324d3cd2d4
--- /dev/null
+++ b/mask_adapter/data/datasets/__init__.py
@@ -0,0 +1,35 @@
+"""
+Copyright (2023) Bytedance Ltd. and/or its affiliates
+
+Licensed under the Apache License, Version 2.0 (the "License"); 
+you may not use this file except in compliance with the License. 
+You may obtain a copy of the License at 
+
+    http://www.apache.org/licenses/LICENSE-2.0 
+
+Unless required by applicable law or agreed to in writing, software 
+distributed under the License is distributed on an "AS IS" BASIS, 
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+See the License for the specific language governing permissions and 
+limitations under the License. 
+"""
+
+from . import (
+    register_coco_panoptic_annos_semseg,
+    register_ade20k_panoptic,
+    register_cityscapes_panoptic,
+    register_mapillary_vistas_panoptic,
+    register_ade20k_full,
+    register_pascal_voc_20_semantic,
+    register_pascal_voc_21_semantic,
+    register_pascal_ctx_59_sem_seg,
+    register_pascal_ctx_459_sem_seg,
+    register_coco_instance,
+    register_ade20k_instance,
+    register_coco_stuff_164k,
+    #register_all_grand,
+    openseg_classes
+)
+
+#from .register_grand_data import register_all_grand
+# from .register_objects365 import register_all_obj365v1
\ No newline at end of file
diff --git a/mask_adapter/data/datasets/__pycache__/__init__.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..94a5f0c47c0535f01ba8f016d3abc7c9b8b8af29
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/__init__.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ffac200bf3f821b33e4d5b0854880a4a2cdf1599
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/__init__.cpython-38.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/openseg_classes.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/openseg_classes.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8ddb552db7b3eac4ac6f0d3037d3271bd4a12495
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/openseg_classes.cpython-310.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/openseg_classes.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/openseg_classes.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7007fd1f10f17e5bfed83cf1a138e22f43707a0c
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/openseg_classes.cpython-38.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_ade20k_full.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/register_ade20k_full.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bd1aec844dd1fe9d4a7153531c9f1d089a14aced
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_ade20k_full.cpython-310.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_ade20k_full.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_ade20k_full.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c1579db2e742ada067999124f32cc483d1233b07
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_ade20k_full.cpython-38.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_ade20k_instance.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/register_ade20k_instance.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c2a478866cc396a93e04866f3a38e45f3bca45ad
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_ade20k_instance.cpython-310.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_ade20k_instance.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_ade20k_instance.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6dc80f3eb5b38d5736ae3acf60574128e7a26b7b
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_ade20k_instance.cpython-38.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_ade20k_panoptic.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/register_ade20k_panoptic.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d8077364b50cbe4e5f7345da63c96dc1012deab4
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_ade20k_panoptic.cpython-310.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_ade20k_panoptic.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_ade20k_panoptic.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..58673df0b561ec33a30751494a4f09d8b25f89c8
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_ade20k_panoptic.cpython-38.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_cityscapes_panoptic.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/register_cityscapes_panoptic.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8668d2e358c3edd3a8e36d89f8597a2414f0e4e8
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_cityscapes_panoptic.cpython-310.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_cityscapes_panoptic.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_cityscapes_panoptic.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6a95f6a4812bfb4aada31a43fd53926e376f19f4
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_cityscapes_panoptic.cpython-38.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_coco_instance.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/register_coco_instance.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ff1e9d4994a9330d63974899731758000be55694
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_coco_instance.cpython-310.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_coco_instance.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_coco_instance.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7b64e0cc3d8323139df05bd771dfc83386838e5e
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_coco_instance.cpython-38.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_coco_panoptic_annos_semseg.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/register_coco_panoptic_annos_semseg.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..844daa84e62659558cf04fbaf2073ec04713bb5b
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_coco_panoptic_annos_semseg.cpython-310.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_coco_panoptic_annos_semseg.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_coco_panoptic_annos_semseg.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b81eee86727769719fb1eafa42c58d8b02f4a400
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_coco_panoptic_annos_semseg.cpython-38.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_coco_stuff_164k.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/register_coco_stuff_164k.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9fadf4bc78a8e95843135bfa9d1c642eb0c69446
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_coco_stuff_164k.cpython-310.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_coco_stuff_164k.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_coco_stuff_164k.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7aef74e4819ec3e8e1a2f25962957af2b0a4ae47
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_coco_stuff_164k.cpython-38.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_grand_data.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_grand_data.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7697404d2867ab899fa2d1ced0f6e2c57c824e21
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_grand_data.cpython-38.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_mapillary_vistas_panoptic.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/register_mapillary_vistas_panoptic.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..320004295f5db22d597c24f66925f67b431ded57
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_mapillary_vistas_panoptic.cpython-310.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_mapillary_vistas_panoptic.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_mapillary_vistas_panoptic.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9fd8120f4b317a43cd6fe8af07d44658246bfe09
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_mapillary_vistas_panoptic.cpython-38.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_objects365.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/register_objects365.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6b0b3850259c5af6344e7ebbc365c6ca10f25d57
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_objects365.cpython-310.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_objects365.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_objects365.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cf7318f8c4a55b1670ebe58b2a5d5106c357e486
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_objects365.cpython-38.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_pascal_ctx_459_sem_seg.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/register_pascal_ctx_459_sem_seg.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1efc99f2fb5ee4b15022185d22c93e6588c64416
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_pascal_ctx_459_sem_seg.cpython-310.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_pascal_ctx_459_sem_seg.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_pascal_ctx_459_sem_seg.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1656d3617cf6767c95bf7a4aff7a588e0c6849c6
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_pascal_ctx_459_sem_seg.cpython-38.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_pascal_ctx_59_sem_seg.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/register_pascal_ctx_59_sem_seg.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..89154682e470bb3f1718dcc1e33a3dbd1612dc11
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_pascal_ctx_59_sem_seg.cpython-310.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_pascal_ctx_59_sem_seg.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_pascal_ctx_59_sem_seg.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b8e6ef8890da03a2376174e9d7537da846fa93f0
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_pascal_ctx_59_sem_seg.cpython-38.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_pascal_voc_20_semantic.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/register_pascal_voc_20_semantic.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..29ce945a2b27bb16fe34cd67548996f94402d21b
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_pascal_voc_20_semantic.cpython-310.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_pascal_voc_20_semantic.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_pascal_voc_20_semantic.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8e8d0552042ee1b1cf3884a4d5ea2a92ab6d9ea1
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_pascal_voc_20_semantic.cpython-38.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_pascal_voc_21_semantic.cpython-310.pyc b/mask_adapter/data/datasets/__pycache__/register_pascal_voc_21_semantic.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..966c9ed6043a6a5414809f46b7eceda554f43504
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_pascal_voc_21_semantic.cpython-310.pyc differ
diff --git a/mask_adapter/data/datasets/__pycache__/register_pascal_voc_21_semantic.cpython-38.pyc b/mask_adapter/data/datasets/__pycache__/register_pascal_voc_21_semantic.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fc3e96834bde4721a8a96b506c73e42dc564c5c3
Binary files /dev/null and b/mask_adapter/data/datasets/__pycache__/register_pascal_voc_21_semantic.cpython-38.pyc differ
diff --git a/mask_adapter/data/datasets/ade20k_150_with_prompt_eng.txt b/mask_adapter/data/datasets/ade20k_150_with_prompt_eng.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5226edfa39dd317be992f30c661185aa030c8180
--- /dev/null
+++ b/mask_adapter/data/datasets/ade20k_150_with_prompt_eng.txt
@@ -0,0 +1,151 @@
+0:invalid_class_id
+1:wall,walls,brick wall,stone wall,interior wall
+2:building,buildings,edifice,edifices
+3:sky,clouds
+4:floor,flooring
+5:tree,trees
+6:ceiling
+7:road,route,street,roads,streets,routes
+8:bed,beds
+9:windowpane,window,windows
+10:grass,grass field
+11:cabinet,cabinets,wall mounted cabine
+12:sidewalk,pavement
+13:person,child,girl,boy,woman,man,people,children,girls,boys,women,men
+14:earth,ground
+15:door,double door,doors
+16:table,tables,tablecloth
+17:mountain,mount,mountains
+18:plant,flora,plant life,plants,bushes
+19:curtain,drape,drapery,mantle,pall
+20:chair,chairs
+21:car,automobile,cars
+22:water
+23:painting,picture,paintings,pictures,wallart,framed canvas
+24:sofa,couch,sofas,couches
+25:shelf,shelves
+26:house exterior
+27:sea,ocean
+28:mirror,mirrors
+29:rug,carpet,carpeting
+30:field
+31:armchair,armchairs
+32:seat,seats
+33:fence,fencing
+34:desk,desks
+35:rock,stone,rocks,stones
+36:wardrobe,closet,press,wardrobes,closets
+37:lamp,lamps
+38:bathtub,bathing tub,bath,tub
+39:railing,rail
+40:cushion,cushions
+41:pedestal
+42:box,boxes
+43:column,pillar
+44:signboard,sign,signboards,signs
+45:chest of drawers,chest,bureau,dresser
+46:counter
+47:sand
+48:sink
+49:skyscraper,skyscrapers
+50:fireplace,hearth,open fireplace
+51:refrigerator,icebox
+52:grandstand,covered stand
+53:path
+54:stairs,steps
+55:runway
+56:case,display case,showcase,vitrine
+57:pool table,billiard table,snooker table
+58:pillow,pillows
+59:screen door,shower door
+60:stairway,staircase
+61:river
+62:bridge,span
+63:bookcase
+64:window screen,door screen
+65:coffee table,cocktail table
+66:toilet,commode,crapper,potty
+67:flower,flowers
+68:book,books
+69:hill
+70:bench,benches
+71:countertop,counter top,worktop
+72:stove,kitchen stove,kitchen range,kitchen range,cooking stove
+73:palm tree,palm trees
+74:kitchen island
+75:computer,computing machine,computing device,data processor,electronic computer,information processing system
+76:swivel chair
+77:boat
+78:bar
+79:arcade machine,arcade machines
+80:hovel,hut,hutch,shack,shanty
+81:bus,autobus,double-decker,jitney,motorbus,motorcoach,omnibus,passenger vehicle
+82:towel
+83:light bulb,lightbulb,bulb,incandescent lamp,electric light,electric-light bulb
+84:truck,motortruck
+85:tower,towers
+86:chandelier,pendant,pendent
+87:awning,sunshade,sunblind
+88:streetlight,street lamp
+89:booth,cubicle,stall,kiosk
+90:television receiver,television,television set,tv,tv set
+91:airplane,aeroplane,airplanes,aeroplanes
+92:dirt track
+93:apparel,wearing apparel,dress,clothes
+94:pole
+95:land,soil
+96:bannister,banister,balustrade,balusters,handrail
+97:escalator,moving staircase,moving stairway
+98:ottoman,pouf,pouffe,puff,hassock
+99:bottle,bottles,water bottle
+100:buffet,sideboard
+101:poster,posting,placard,notice,bill,card
+102:stage
+103:van
+104:ship
+105:fountain
+106:conveyer belt,conveyor belt,conveyer,conveyor,transporter
+107:canopy
+108:washer,automatic washer,washing machine
+109:plaything,toy,toys
+110:swimming pool,swimming bath
+111:stool,stools
+112:barrel,cask,barrels,casks
+113:basket,handbasket
+114:waterfall,falls
+115:tent,collapsible shelter
+116:bag,bags,gift bag,paper bag
+117:minibike,motorbike
+118:cradle
+119:oven
+120:ball,balls
+121:food,solid food
+122:step,stair
+123:tank,storage tank
+124:trade name,brand name,brand,marque
+125:microwave,microwave oven
+126:plant pots,plant pot,flower pot,flowerpot,planter
+127:animal,animate being,dog,cat,horse,cow,sheep,zebra,girraffe,bird
+128:bicycle,bike
+129:lake
+130:dishwasher,dish washer,dishwashing machine
+131:projection screen
+132:blanket,cover
+133:sculpture,sculptures
+134:exhaust hood
+135:sconce,sconce lamp,sconce light
+136:vase,vases
+137:traffic light,traffic signal,traffic lights
+138:tray,trays
+139:ashcan,trash can,garbage can,wastebin,ash bin,ash-bin,ashbin,dustbin,trash barrel,trash bin
+140:ceiling fan,floor fan
+141:pier,wharf,wharfage,dock
+142:crt screen
+143:plate,plates
+144:monitor,monitoring device,monitors
+145:bulletin board,notice board
+146:shower
+147:radiator
+148:cup,cups,drinking glass,drinking glasses
+149:clock
+150:flag,flags
\ No newline at end of file
diff --git a/mask_adapter/data/datasets/ade20k_847_with_prompt_eng.txt b/mask_adapter/data/datasets/ade20k_847_with_prompt_eng.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b7631743521190368745270b3fd73d418040d12a
--- /dev/null
+++ b/mask_adapter/data/datasets/ade20k_847_with_prompt_eng.txt
@@ -0,0 +1,848 @@
+0:invalid_class_id
+1:wall,walls,interior wall,brick wall,stone wall
+2:building,buildings,edifice,edifices
+3:sky,clouds
+4:tree,trees
+5:road,route,street,roads,streets,routes
+6:floor,flooring
+7:ceiling
+8:bed,beds
+9:sidewalk,pavement
+10:earth,ground
+11:cabinet,cabinets,wall mounted cabine
+12:person,child,girl,boy,woman,man,people,children,girls,boys,women,men
+13:grass,grass field
+14:windowpane,window,windows
+15:car,automobile,cars
+16:mountain,mount,mountains
+17:plant,flora,plant life,plants,bushes
+18:table,tables,tablecloth
+19:chair,chairs
+20:curtain,drape,drapery,mantle,pall
+21:door,double door,doors
+22:sofa,couch,sofas,couches
+23:sea,ocean
+24:painting,picture,paintings,pictures,wallart,framed canvas
+25:water
+26:mirror,mirrors
+27:house exterior
+28:rug,carpet,carpeting
+29:shelf,shelves
+30:armchair,armchairs
+31:fence,fencing
+32:field
+33:lamp,lamps
+34:rock,stone,rocks,stones
+35:seat,seats
+36:river
+37:desk,desks
+38:bathtub,bathing tub,bath,tub
+39:railing,rail
+40:signboard,sign,signboards,signs
+41:cushion,cushions
+42:path
+43:work surface
+44:stairs,steps
+45:column,pillar
+46:sink
+47:wardrobe,closet,press,wardrobes,closets
+48:snow
+49:refrigerator,icebox
+50:pedestal
+51:bridge,span
+52:blind
+53:runway
+54:cliff,drop,drop-off
+55:sand
+56:fireplace,hearth,open fireplace
+57:pillow,pillows
+58:screen door,shower door
+59:toilet,commode,crapper,potty
+60:skyscraper,skyscrapers
+61:grandstand,covered stand
+62:box,boxes
+63:pool table,billiard table,snooker table
+64:palm tree,palm trees
+65:double door
+66:coffee table,cocktail table
+67:counter
+68:countertop,counter top,worktop
+69:chest of drawers,chest,bureau,dresser
+70:kitchen island
+71:boat
+72:waterfall,falls
+73:stove,kitchen stove,kitchen range,kitchen range,cooking stove
+74:flower,flowers
+75:bookcase
+76:controls
+77:book,books
+78:stairway,staircase
+79:streetlight,street lamp
+80:computer,computing machine,computing device,data processor,electronic computer,information processing system
+81:bus,autobus,double-decker,jitney,motorbus,motorcoach,omnibus,passenger vehicle
+82:swivel chair
+83:light,light source
+84:bench,benches
+85:case,display case,showcase,vitrine
+86:towel
+87:fountain
+88:embankment
+89:television receiver,television,television set,tv,tv set
+90:van
+91:hill
+92:awning,sunshade,sunblind
+93:poster,posting,placard,notice,bill,card
+94:truck,motortruck
+95:airplane,aeroplane,airplanes,aeroplanes
+96:pole
+97:tower,towers
+98:court
+99:ball,balls
+100:aircraft carrier,carrier,flattop,attack aircraft carrier
+101:buffet,sideboard
+102:hovel,hut,hutch,shack,shanty
+103:apparel,wearing apparel,dress,clothes
+104:minibike,motorbike
+105:animal,animate being,dog,cat,horse,cow,sheep,zebra,giraffe,bird
+106:chandelier,pendant,pendent
+107:step,stair
+108:booth,cubicle,stall,kiosk
+109:bicycle,bike
+110:doorframe,doorcase
+111:sconce,sconce lamp,sconce light
+112:pond
+113:trade name,brand name
+114:bannister,banister,balustrade,balusters,handrail
+115:bag,bags,gift bag,paper bag
+116:traffic light,traffic signal,traffic lights
+117:gazebo
+118:escalator,moving staircase,moving stairway
+119:land,soil
+120:board,plank
+121:arcade machine,arcade machines
+122:eiderdown,duvet,continental quilt
+123:bar
+124:stall,stand,sales booth
+125:playground
+126:ship
+127:ottoman,pouf,pouffe,puff,hassock
+128:ashcan,trash can,garbage can,wastebin,ash bin,ash-bin,ashbin,dustbin,trash barrel,trash bin
+129:bottle,bottles,water bottle
+130:cradle
+131:pot,flowerpot
+132:conveyer belt,conveyor belt,conveyer,conveyor,transporter
+133:train,railroad train
+134:stool,stools
+135:lake
+136:tank,storage tank
+137:ice,water ice
+138:basket,handbasket
+139:manhole
+140:tent,collapsible shelter
+141:canopy
+142:microwave,microwave oven
+143:barrel,cask,barrels,casks
+144:dirt track
+145:beam
+146:dishwasher,dish washer,dishwashing machine
+147:plate,plates
+148:crt screen
+149:ruins
+150:washer,automatic washer,washing machine
+151:blanket,cover
+152:plaything,toy,toys
+153:food,solid food
+154:projection screen
+155:oven
+156:stage
+157:beacon,lighthouse,beacon light,pharos
+158:umbrella
+159:sculpture,sculptures
+160:aqueduct
+161:container
+162:scaffolding,staging
+163:exhaust hood
+164:curb,curbing,kerb
+165:roller coaster
+166:horse,equus caballus
+167:catwalk
+168:glass,drinking glass
+169:vase,vases
+170:central reservation
+171:carousel
+172:radiator
+173:closet
+174:machine
+175:pier,wharf,wharfage,dock
+176:ceiling fan,floor fan
+177:inflatable bounce game
+178:pitch
+179:paper
+180:arcade,colonnade
+181:hot tub
+182:helicopter
+183:tray,trays
+184:partition,divider
+185:vineyard
+186:bowl
+187:bullring
+188:flag,flags
+189:pot
+190:footbridge,overcrossing,pedestrian bridge
+191:shower
+192:bag,traveling bag,travelling bag,grip,suitcase
+193:bulletin board,notice board
+194:confessional booth
+195:trunk,tree trunk,bole
+196:forest
+197:elevator door
+198:laptop,laptop computer
+199:instrument panel
+200:bucket,pail
+201:tapestry,tapis
+202:platform
+203:jacket
+204:gate
+205:monitor,monitoring device,monitors
+206:telephone booth,phone booth,call box,telephone box,telephone kiosk
+207:spotlight,spot
+208:ring
+209:control panel
+210:blackboard,chalkboard
+211:air conditioner,air conditioning
+212:chest
+213:clock
+214:sand dune
+215:pipe,pipage,piping
+216:vault
+217:table football
+218:cannon
+219:swimming pool,swimming bath
+220:fluorescent,fluorescent fixture
+221:statue
+222:loudspeaker,speaker,speaker unit,loudspeaker system,speaker system
+223:exhibitor
+224:ladder
+225:carport
+226:dam
+227:pulpit
+228:skylight,fanlight
+229:water tower
+230:grill,grille,grillwork
+231:display board
+232:pane,pane of glass,window glass
+233:rubbish,trash,scrap
+234:ice rink
+235:fruit
+236:patio
+237:vending machine
+238:telephone,phone,telephone set
+239:net
+240:backpack,back pack,knapsack,packsack,rucksack,haversack
+241:jar
+242:track
+243:magazine
+244:shutter
+245:roof
+246:banner,streamer
+247:landfill
+248:post
+249:altarpiece,reredos
+250:hat,chapeau,lid
+251:arch,archway
+252:table game
+253:bag,handbag,pocketbook,purse
+254:document,written document,papers
+255:dome
+256:pier
+257:shanties
+258:forecourt
+259:crane
+260:dog,domestic dog,canis familiaris
+261:piano,pianoforte,forte-piano
+262:drawing
+263:cabin
+264:ad,advertisement,advertizement,advertising,advertizing,advert
+265:amphitheater,amphitheatre,coliseum
+266:monument
+267:henhouse
+268:cockpit
+269:heater,warmer
+270:windmill,aerogenerator,wind generator
+271:pool
+272:elevator,lift
+273:decoration,ornament,ornamentation
+274:labyrinth
+275:text,textual matter
+276:printer
+277:mezzanine,first balcony
+278:mattress
+279:straw
+280:stalls
+281:patio,terrace
+282:billboard,hoarding
+283:bus stop
+284:trouser,pant
+285:console table,console
+286:rack
+287:notebook
+288:shrine
+289:pantry
+290:cart
+291:steam shovel
+292:porch
+293:postbox,mailbox,letter box
+294:figurine,statuette
+295:recycling bin
+296:folding screen
+297:telescope
+298:deck chair,beach chair
+299:kennel
+300:coffee maker
+301:altar,communion table,lord's table
+302:fish
+303:easel
+304:artificial golf green
+305:iceberg
+306:candlestick,candle holder
+307:shower stall,shower bath
+308:television stand
+309:wall socket,wall plug,electric outlet,electrical outlet,outlet,electric receptacle
+310:skeleton
+311:grand piano,grand
+312:candy,confect
+313:grille door
+314:pedestal,plinth,footstall
+315:jersey,t-shirt,tee shirt
+316:shoe
+317:gravestone,headstone,tombstone
+318:shanty
+319:structure
+320:rocking chair,rocker
+321:bird
+322:place mat
+323:tomb
+324:big top
+325:gas pump,gasoline pump,petrol pump,island dispenser
+326:lockers
+327:cage
+328:finger
+329:bleachers
+330:ferris wheel
+331:hairdresser chair
+332:mat
+333:stands
+334:aquarium,fish tank,marine museum
+335:streetcar,tram,tramcar,trolley,trolley car
+336:napkin,table napkin,serviette
+337:dummy
+338:booklet,brochure,folder,leaflet,pamphlet
+339:sand trap
+340:shop,store
+341:table cloth
+342:service station
+343:coffin
+344:drawer
+345:cages
+346:slot machine,coin machine
+347:balcony
+348:volleyball court
+349:table tennis
+350:control table
+351:shirt
+352:merchandise,ware,product
+353:railway
+354:parterre
+355:chimney
+356:can,tin,tin can
+357:tanks
+358:fabric,cloth,material,textile
+359:alga,algae
+360:system
+361:map
+362:greenhouse
+363:mug
+364:barbecue
+365:trailer
+366:toilet tissue,toilet paper,bathroom tissue
+367:organ
+368:dishrag,dishcloth
+369:island
+370:keyboard
+371:trench
+372:basket,basketball hoop,hoop
+373:steering wheel,wheel
+374:pitcher,ewer
+375:goal
+376:bread,breadstuff,staff of life
+377:beds
+378:wood
+379:file cabinet
+380:newspaper,paper
+381:motorboat
+382:rope
+383:guitar
+384:rubble
+385:scarf
+386:barrels
+387:cap
+388:leaves
+389:control tower
+390:dashboard
+391:bandstand
+392:lectern
+393:switch,electric switch,electrical switch
+394:baseboard,mopboard,skirting board
+395:shower room
+396:smoke
+397:faucet,spigot
+398:bulldozer
+399:saucepan
+400:shops
+401:meter
+402:crevasse
+403:gear
+404:candelabrum,candelabra
+405:sofa bed
+406:tunnel
+407:pallet
+408:wire,conducting wire
+409:kettle,boiler
+410:bidet
+411:baby buggy,baby carriage,carriage,perambulator,pram,stroller,go-cart,pushchair,pusher
+412:music stand
+413:pipe,tube
+414:cup,cups,drinking glass,drinking glasses
+415:parking meter
+416:ice hockey rink
+417:shelter
+418:weeds
+419:temple
+420:patty,cake
+421:ski slope
+422:panel
+423:wallet
+424:wheel
+425:towel rack,towel horse
+426:roundabout
+427:canister,cannister,tin
+428:rod
+429:soap dispenser
+430:bell
+431:canvas
+432:box office,ticket office,ticket booth
+433:teacup
+434:trellis
+435:workbench
+436:valley,vale
+437:toaster
+438:knife
+439:podium
+440:ramp
+441:tumble dryer
+442:fireplug,fire hydrant,plug
+443:gym shoe,sneaker,tennis shoe
+444:lab bench
+445:equipment
+446:rocky formation
+447:plastic
+448:calendar
+449:caravan
+450:check-in-desk
+451:ticket counter
+452:brush
+453:mill
+454:covered bridge
+455:bowling alley
+456:hanger
+457:excavator
+458:trestle
+459:revolving door
+460:blast furnace
+461:scale,weighing machine
+462:projector
+463:soap
+464:locker
+465:tractor
+466:stretcher
+467:frame
+468:grating
+469:alembic
+470:candle,taper,wax light
+471:barrier
+472:cardboard
+473:cave
+474:puddle
+475:tarp
+476:price tag
+477:watchtower
+478:meters
+479:light bulb,bulb,bulbs
+480:tracks
+481:hair dryer
+482:skirt
+483:viaduct
+484:paper towel
+485:coat
+486:sheet
+487:fire extinguisher,extinguisher,asphyxiator
+488:water wheel
+489:pottery,clayware
+490:magazine rack
+491:teapot
+492:microphone,mike
+493:support
+494:forklift
+495:canyon
+496:cash register,register
+497:leaf,leafage,foliage
+498:remote control,remote
+499:soap dish
+500:windshield,windscreen
+501:cat
+502:cue,cue stick,pool cue,pool stick
+503:vent,venthole,vent-hole,blowhole
+504:videos
+505:shovel
+506:eaves
+507:antenna,aerial,transmitting aerial
+508:shipyard
+509:hen,biddy
+510:traffic cone
+511:washing machines
+512:truck crane
+513:cds
+514:niche
+515:scoreboard
+516:briefcase
+517:boot
+518:sweater,jumper
+519:hay
+520:pack
+521:bottle rack
+522:glacier
+523:pergola
+524:building materials
+525:television camera
+526:first floor
+527:rifle
+528:tennis table
+529:stadium
+530:safety belt
+531:cover
+532:dish rack
+533:synthesizer
+534:pumpkin
+535:gutter
+536:fruit stand
+537:ice floe,floe
+538:handle,grip,handgrip,hold
+539:wheelchair
+540:mousepad,mouse mat
+541:diploma
+542:fairground ride
+543:radio
+544:hotplate
+545:junk
+546:wheelbarrow
+547:stream
+548:toll plaza
+549:punching bag
+550:trough
+551:throne
+552:chair desk
+553:weighbridge
+554:extractor fan
+555:hanging clothes
+556:dish,dish aerial,dish antenna,saucer
+557:alarm clock,alarm
+558:ski lift
+559:chain
+560:garage
+561:mechanical shovel
+562:wine rack
+563:tramway
+564:treadmill
+565:menu
+566:block
+567:well
+568:witness stand
+569:branch
+570:duck
+571:casserole
+572:frying pan
+573:desk organizer
+574:mast
+575:spectacles,specs,eyeglasses,glasses
+576:service elevator
+577:dollhouse
+578:hammock
+579:clothes hanging
+580:photocopier
+581:notepad
+582:golf cart
+583:footpath
+584:cross
+585:baptismal font
+586:boiler
+587:skip
+588:rotisserie
+589:tables
+590:water mill
+591:helmet
+592:cover curtain
+593:brick
+594:table runner
+595:ashtray
+596:street box
+597:stick
+598:hangers
+599:cells
+600:urinal
+601:centerpiece
+602:portable fridge
+603:dvds
+604:golf club
+605:skirting board
+606:water cooler
+607:clipboard
+608:camera,photographic camera
+609:pigeonhole
+610:chips
+611:food processor
+612:post box
+613:lid
+614:drum
+615:blender
+616:cave entrance
+617:dental chair
+618:obelisk
+619:canoe
+620:mobile
+621:monitors
+622:pool ball
+623:cue rack
+624:baggage carts
+625:shore
+626:fork
+627:paper filer
+628:bicycle rack
+629:coat rack
+630:garland
+631:sports bag
+632:fish tank
+633:towel dispenser
+634:carriage
+635:brochure
+636:plaque
+637:stringer
+638:iron
+639:spoon
+640:flag pole
+641:toilet brush
+642:book stand
+643:water faucet,water tap,tap,hydrant
+644:ticket office
+645:broom
+646:dvd
+647:ice bucket
+648:carapace,shell,cuticle,shield
+649:tureen
+650:folders
+651:chess
+652:root
+653:sewing machine
+654:model
+655:pen
+656:violin
+657:sweatshirt
+658:recycling materials
+659:mitten
+660:chopping board,cutting board
+661:mask
+662:log
+663:mouse,computer mouse
+664:grill
+665:hole
+666:target
+667:trash bag
+668:chalk
+669:sticks
+670:balloon
+671:score
+672:hair spray
+673:roll
+674:runner
+675:engine
+676:inflatable glove
+677:games
+678:pallets
+679:baskets
+680:coop
+681:dvd player
+682:rocking horse
+683:buckets
+684:bread rolls
+685:shawl
+686:watering can
+687:spotlights
+688:post-it
+689:bowls
+690:security camera
+691:runner cloth
+692:lock
+693:alarm,warning device,alarm system
+694:side
+695:roulette
+696:bone
+697:cutlery
+698:pool balls
+699:wheels
+700:spice rack
+701:plant pots,plant pot,flower pot,flowerpot,planter
+702:towel ring
+703:bread box
+704:video
+705:funfair
+706:breads
+707:tripod
+708:ironing board
+709:skimmer
+710:hollow
+711:scratching post
+712:tricycle
+713:file box
+714:mountain pass
+715:tombstones
+716:cooker
+717:card game,cards
+718:golf bag
+719:towel paper
+720:chaise lounge
+721:sun
+722:toilet paper holder
+723:rake
+724:key
+725:umbrella stand
+726:dartboard
+727:transformer
+728:fireplace utensils
+729:sweatshirts
+730:cellular telephone,cellular phone,cellphone,cell,mobile phone
+731:tallboy
+732:stapler
+733:sauna
+734:test tube
+735:palette
+736:shopping carts
+737:tools
+738:push button,push,button
+739:star
+740:roof rack
+741:barbed wire
+742:spray
+743:ear
+744:sponge
+745:racket
+746:tins
+747:eyeglasses
+748:file
+749:scarfs
+750:sugar bowl
+751:flip flop
+752:headstones
+753:laptop bag
+754:leash
+755:climbing frame
+756:suit hanger
+757:floor spotlight
+758:plate rack
+759:sewer
+760:hard drive
+761:sprinkler
+762:tools box
+763:necklace
+764:bulbs
+765:steel industry
+766:club
+767:jack
+768:door bars
+769:control panel,instrument panel,control board,board,panel
+770:hairbrush
+771:napkin holder
+772:office
+773:smoke detector
+774:utensils
+775:apron
+776:scissors
+777:terminal
+778:grinder
+779:entry phone
+780:newspaper stand
+781:pepper shaker
+782:onions
+783:central processing unit,cpu,central processor,processor,mainframe
+784:tape
+785:bat
+786:coaster
+787:calculator
+788:potatoes
+789:luggage rack
+790:salt
+791:street number
+792:viewpoint
+793:sword
+794:cd
+795:rowing machine
+796:plug
+797:andiron,firedog,dog,dog-iron
+798:pepper
+799:tongs
+800:bonfire
+801:dog dish
+802:belt
+803:dumbbells
+804:videocassette recorder,vcr
+805:hook
+806:envelopes
+807:shower faucet
+808:watch
+809:padlock
+810:swimming pool ladder
+811:spanners
+812:gravy boat
+813:notice board
+814:trash bags
+815:fire alarm
+816:ladle
+817:stethoscope
+818:rocket
+819:funnel
+820:bowling pins
+821:valve
+822:thermometer
+823:cups
+824:spice jar
+825:night light
+826:soaps
+827:games table
+828:slotted spoon
+829:reel
+830:scourer
+831:sleeping robe
+832:desk mat
+833:dumbbell
+834:hammer
+835:tie
+836:typewriter
+837:shaker
+838:cheese dish
+839:sea star
+840:racquet
+841:butane gas cylinder
+842:paper weight
+843:shaving brush
+844:sunglasses
+845:gear shift
+846:towel rail
+847:adding machine,totalizer,totaliser
\ No newline at end of file
diff --git a/mask_adapter/data/datasets/cityscapes_with_prompt_eng.txt b/mask_adapter/data/datasets/cityscapes_with_prompt_eng.txt
new file mode 100644
index 0000000000000000000000000000000000000000..65fd30673d2008b09ea8dd5ea0be887506c422c4
--- /dev/null
+++ b/mask_adapter/data/datasets/cityscapes_with_prompt_eng.txt
@@ -0,0 +1,19 @@
+0:road,railroad
+1:sidewalk,pavement
+2:building,buildings,edifice,edifices,house,ceiling
+3:wall,walls,brick wall,stone wall,tile wall,wood wall
+4:fence,fences
+5:pole,poles
+6:traffic light,traffic lights
+7:traffic sign,stop sign
+8:vegetation,tree,trees,palm tree,bushes
+9:terrain,river,sand,sea,snow,water,mountain,grass,dirt,rock
+10:sky,clouds
+11:person
+12:rider
+13:car,cars
+14:truck,trucks
+15:bus,buses
+16:train,trains,locomotive,locomotives,freight train
+17:motorcycle,motorcycles
+18:bicycle,bicycles,bike,bikes
\ No newline at end of file
diff --git a/mask_adapter/data/datasets/coco_panoptic_with_prompt_eng.txt b/mask_adapter/data/datasets/coco_panoptic_with_prompt_eng.txt
new file mode 100644
index 0000000000000000000000000000000000000000..76dc9729374dff8a314638c7a8e0ed0707c78328
--- /dev/null
+++ b/mask_adapter/data/datasets/coco_panoptic_with_prompt_eng.txt
@@ -0,0 +1,201 @@
+0:invalid_class_id
+1:person,child,girl,boy,woman,man,people,children,girls,boys,women,men,lady,guy,ladies,guys,clothes
+2:bicycle,bicycles,bike,bikes
+3:car,cars
+4:motorcycle,motorcycles
+5:airplane,airplanes
+6:bus,buses
+7:train,trains,locomotive,locomotives,freight train
+8:truck,trucks
+9:boat,boats
+10:traffic light
+11:fire hydrant
+12:invalid_class_id
+13:stop sign
+14:parking meter
+15:bench,benches
+16:bird,birds
+17:cat,cats,kitties,kitty
+18:dog,dogs,puppy,puppies
+19:horse,horses,foal
+20:sheep
+21:cow,cows,calf
+22:elephant,elephants
+23:bear,bears
+24:zebra,zebras
+25:giraffe,giraffes
+26:invalid_class_id
+27:backpack,backpacks
+28:umbrella,umbrellas
+29:invalid_class_id
+30:invalid_class_id
+31:handbag,handbags
+32:tie
+33:suitcase,suitcases
+34:frisbee
+35:skis
+36:snowboard
+37:sports ball
+38:kite,kites
+39:baseball bat
+40:baseball glove
+41:skateboard
+42:surfboard
+43:tennis racket
+44:bottle,bottles,water bottle
+45:invalid_class_id
+46:wine glass,wine glasses,wineglass
+47:cup,cups,water cup,water glass
+48:fork,forks
+49:knife,knives
+50:spoon,spoons
+51:bowl,bowls
+52:banana,bananas
+53:apple,apples,apple fruit
+54:sandwich,sandwiches
+55:orange fruit
+56:broccoli
+57:carrot,carrots
+58:hot dog
+59:pizza
+60:donut,donuts
+61:cake,cakes
+62:chair,chairs
+63:couch,sofa,sofas
+64:potted plant,potted plants,pottedplant,pottedplants,planter,planters
+65:bed,beds
+66:invalid_class_id
+67:dining table,dining tables,diningtable,diningtables,plate,plates,diningtable tablecloth
+68:invalid_class_id
+69:invalid_class_id
+70:toilet
+71:invalid_class_id
+72:tv
+73:laptop
+74:mouse
+75:tv remote,remote control
+76:keyboard
+77:cell phone,mobile
+78:microwave
+79:oven,ovens
+80:toaster
+81:sink,sinks
+82:refrigerator,fridge
+83:invalid_class_id
+84:book,books
+85:clock
+86:vase,vases
+87:scissor,scissors
+88:teddy bear,teddy bears
+89:hair drier
+90:toothbrush,toothbrushes
+91:invalid_class_id
+92:banner,banners
+93:blanket,blankets
+94:invalid_class_id
+95:bridge
+96:invalid_class_id
+97:invalid_class_id
+98:invalid_class_id
+99:invalid_class_id
+100:cardboard
+101:invalid_class_id
+102:invalid_class_id
+103:invalid_class_id
+104:invalid_class_id
+105:invalid_class_id
+106:invalid_class_id
+107:counter
+108:invalid_class_id
+109:curtain,curtains
+110:invalid_class_id
+111:invalid_class_id
+112:door,doors
+113:invalid_class_id
+114:invalid_class_id
+115:invalid_class_id
+116:invalid_class_id
+117:invalid_class_id
+118:wood floor
+119:flower,flowers
+120:invalid_class_id
+121:invalid_class_id
+122:fruit,fruits
+123:invalid_class_id
+124:invalid_class_id
+125:gravel
+126:invalid_class_id
+127:invalid_class_id
+128:house
+129:invalid_class_id
+130:lamp,bulb,lamps,bulbs
+131:invalid_class_id
+132:invalid_class_id
+133:mirror
+134:invalid_class_id
+135:invalid_class_id
+136:invalid_class_id
+137:invalid_class_id
+138:tennis net
+139:invalid_class_id
+140:invalid_class_id
+141:pillow,pillows
+142:invalid_class_id
+143:invalid_class_id
+144:platform
+145:playingfield,tennis court,baseball field,soccer field,tennis field
+146:invalid_class_id
+147:railroad
+148:river
+149:road
+150:invalid_class_id
+151:roof
+152:invalid_class_id
+153:invalid_class_id
+154:sand
+155:sea,sea wave,wave,waves
+156:shelf
+157:invalid_class_id
+158:invalid_class_id
+159:snow
+160:invalid_class_id
+161:stairs
+162:invalid_class_id
+163:invalid_class_id
+164:invalid_class_id
+165:invalid_class_id
+166:tent
+167:invalid_class_id
+168:towel
+169:invalid_class_id
+170:invalid_class_id
+171:brick wall
+172:invalid_class_id
+173:invalid_class_id
+174:invalid_class_id
+175:stone wall
+176:tile wall
+177:wood wall
+178:water
+179:invalid_class_id
+180:window blind
+181:window
+182:invalid_class_id
+183:invalid_class_id
+184:tree,trees,palm tree,bushes
+185:fence,fences
+186:ceiling
+187:sky,clouds
+188:cabinet,cabinets
+189:table
+190:floor,flooring,tile floor
+191:pavement
+192:mountain,mountains
+193:grass
+194:dirt
+195:paper
+196:food
+197:building,buildings
+198:rock
+199:wall,walls
+200:rug
\ No newline at end of file
diff --git a/mask_adapter/data/datasets/coco_stuff_with_prompt_eng.txt b/mask_adapter/data/datasets/coco_stuff_with_prompt_eng.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3ad453c5f577c05514d2aa2d71c1871778263971
--- /dev/null
+++ b/mask_adapter/data/datasets/coco_stuff_with_prompt_eng.txt
@@ -0,0 +1,183 @@
+0:invalid_class_id
+1:person,child,girl,boy,woman,man,people,children,girls,boys,women,men,lady,guy,ladies,guys
+2:bicycle,bicycles,bike,bikes
+3:car,cars
+4:motorcycle,motorcycles
+5:airplane,airplanes
+6:bus,buses
+7:train,trains,locomotive,locomotives,freight train
+8:truck,trucks
+9:boat,boats
+10:traffic light
+11:fire hydrant
+12:invalid_class_id
+13:stop sign
+14:parking meter
+15:bench,benches
+16:bird,birds
+17:cat,cats,kitties,kitty
+18:dog,dogs,puppy,puppies
+19:horse,horses,foal
+20:sheep
+21:cow,cows,calf
+22:elephant,elephants
+23:bear,bears
+24:zebra,zebras
+25:giraffe,giraffes
+26:invalid_class_id
+27:backpack,backpacks
+28:umbrella,umbrellas
+29:invalid_class_id
+30:invalid_class_id
+31:handbag,handbags
+32:tie
+33:suitcase,suitcases
+34:frisbee
+35:skis
+36:snowboard
+37:sports ball
+38:kite,kites
+39:baseball bat
+40:baseball glove
+41:skateboard
+42:surfboard
+43:tennis racket
+44:bottle,bottles,water bottle
+45:invalid_class_id
+46:wine glass,wine glasses,wineglass
+47:cup,cups,water cup,water glass
+48:fork,forks
+49:knife,knives
+50:spoon,spoons
+51:bowl,bowls
+52:banana,bananas
+53:apple,apples,apple fruit
+54:sandwich,sandwiches
+55:orange,oranges,orange fruit
+56:broccoli
+57:carrot,carrots
+58:hot dog
+59:pizza
+60:donut,donuts
+61:cake,cakes
+62:chair,chairs
+63:couch,sofa,sofas
+64:potted plant,potted plants,pottedplant,pottedplants,planter,planters
+65:bed,beds
+66:invalid_class_id
+67:dining table,dining tables,diningtable,diningtables,plate,plates,diningtable tablecloth
+68:invalid_class_id
+69:invalid_class_id
+70:toilet
+71:invalid_class_id
+72:tv
+73:laptop
+74:mouse
+75:remote,tv remote,remote control
+76:keyboard
+77:cell phone,mobile
+78:microwave
+79:oven,ovens
+80:toaster
+81:sink,sinks
+82:refrigerator,fridge
+83:invalid_class_id
+84:book,books
+85:clock
+86:vase,vases
+87:scissors,scissor
+88:teddy bear,teddy bears
+89:hair drier
+90:toothbrush,toothbrushes
+91:invalid_class_id
+92:banner,banners
+93:blanket,blankets
+94:branch
+95:bridge
+96:building,buildings
+97:bush,bushes
+98:cabinet,cabinets
+99:cage,cages
+100:cardboard
+101:carpet,carpets
+102:ceiling-other,ceiling
+103:ceiling-tile,ceiling tile
+104:cloth
+105:clothes
+106:clouds
+107:counter
+108:cupboard,cupboards
+109:curtain,curtains
+110:desk-stuff,desk,desks
+111:dirt
+112:door-stuff,door,doors
+113:fence,fences
+114:floor-marble,marble floor,floor marble
+115:floor-other,floor
+116:floor-stone,stone floor,floor stone
+117:floor-tile,tile floor,floor tile
+118:floor-wood,wood floor,floor wood
+119:flower,flowers
+120:fog
+121:food-other,food
+122:fruit,fruits
+123:furniture-other,furniture
+124:grass
+125:gravel
+126:ground-other,ground
+127:hill
+128:house
+129:leaves
+130:light
+131:mat
+132:metal
+133:mirror-stuff,mirror
+134:moss
+135:mountain,mountains
+136:mud
+137:napkin
+138:net
+139:paper
+140:pavement
+141:pillow,pillows
+142:plant-other
+143:plastic
+144:platform
+145:playingfield,tennis court,baseball field,soccer field,tennis field
+146:railing
+147:railroad
+148:river
+149:road
+150:rock
+151:roof
+152:rug
+153:salad
+154:sand
+155:sea,sea wave,wave,waves
+156:shelf
+157:sky-other,sky
+158:skyscraper
+159:snow
+160:solid-other,solid
+161:stairs
+162:stone
+163:straw
+164:structural-other,structural
+165:table
+166:tent
+167:textile-other,textile
+168:towel
+169:tree,trees,palm tree
+170:vegetable
+171:wall-brick,brick wall,wall brick
+172:wall-concrete,concrete wall,wall concrete
+173:wall-other,wall
+174:wall-panel,wall panel,panel wall
+175:wall-stone,stone wall,wall stone
+176:wall-tile,wall tile,tile wall
+177:wall-wood,wood wall, wall wood
+178:water-other,water
+179:waterdrops
+180:window-blind,window blind
+181:window-other,window
+182:wood
\ No newline at end of file
diff --git a/mask_adapter/data/datasets/grand_with_prompt_eng.txt b/mask_adapter/data/datasets/grand_with_prompt_eng.txt
new file mode 100644
index 0000000000000000000000000000000000000000..248010c41232e6927ca3db568beae60acd9de939
--- /dev/null
+++ b/mask_adapter/data/datasets/grand_with_prompt_eng.txt
@@ -0,0 +1,5252 @@
+1:antenna,antennas
+2:clock_tower
+3:tower,towers
+4:clock
+5:house,houses
+6:life_buoy
+7:sail,sails
+8:raft,rafts
+9:oar
+10:life_jacket
+11:hat,hats
+12:Boats,boat,boats
+13:paddle,paddles
+14:person,persons
+15:rowboat,rowboats
+16:sculpture,sculptures
+17:castle
+18:train,train_,trains
+19:pipe,pipes
+20:vent,vents
+21:camera,cameras
+22:signboard
+23:lightbulb
+24:poster,posters
+25:bench
+26:traffic_light
+27:ceiling,ceilings
+28:station,stations
+29:city
+30:plaza
+31:Statue,Statues,statue,statue_,statues
+32:flowerpot
+33:flagpole
+34:jacket,jackets
+35:view,views
+36:scene,scenes
+37:taillight
+38:Cars,car,car_,cars
+39:awning,awnings
+40:headlight,headlights
+41:cone,cones
+42:motorcycle,motorcycles
+43:motor_scooter
+44:streetlight,streetlights
+45:Traffic,traffic
+46:building,buildings
+47:mall
+48:cathedral
+49:camel,camels
+50:backpack,backpacks
+51:sweatshirt
+52:pigeon,pigeons
+53:road
+54:group
+55:Mountains,mountain,mountains
+56:balloon,balloons
+57:gym
+58:air_conditioner
+59:street_sign
+60:pole,poles
+61:Trees,tree,trees
+62:grass
+63:travel
+64:bicycle,bicycles
+65:bike,bikes
+66:Street,street,streets
+67:roof,roofs
+68:costume,costumes
+69:parade
+70:Crowds,crowd,crowds
+71:event,events
+72:mask,masks
+73:blanket,blankets
+74:sandal,sandal_,sandals
+75:banner,banners
+76:cap,cap_,caps
+77:minivan
+78:bracelet,bracelets
+79:flip-flop,flip-flop_,flip-flops
+80:frisbee
+81:shirt,shirts
+82:headscarf
+83:windshield_wiper
+84:telephone_pole
+85:platform
+86:railway,railways
+87:airplane,airplanes
+88:formation,formations
+89:trash_can
+90:jean,jeans
+91:boardwalk
+92:cincture
+93:march
+94:dress
+95:flag,flags
+96:trouser,trousers
+97:belt
+98:Shoes,shoe,shoes
+99:umbrella,umbrellas
+100:parking
+101:book,books
+102:library
+103:milk_can
+104:log,logs
+105:bottle,bottles
+106:bottle_cap
+107:water_bottle
+108:garbage
+109:plastic_bag
+110:plastic
+111:camping
+112:campsite
+113:beach
+114:scarf
+115:shopping_bag
+116:sport,sports
+117:handbag,handbags
+118:helmet,helmets
+119:tote_bag
+120:rearview_mirror
+121:wheel,wheels
+122:grill
+123:coat,coats
+124:race_car
+125:vehicle,vehicles
+126:sky
+127:red
+128:serene
+129:river
+130:sock
+131:legging,legging_,leggings
+132:shoulder_bag
+133:blouse
+134:short_pants
+135:telephone
+136:lamppost
+137:strap
+138:park
+139:People,people
+140:fence
+141:tall
+142:Church,church
+143:scooter,scooters
+144:motorbike,motorbikes
+145:motorcyclist,motorcyclists
+146:shop,shops
+147:Rows,row,rows
+148:carnival
+149:water,waters
+150:Protest,protest
+151:sign,signs
+152:insect,insects
+153:night
+154:mosque,mosques
+155:keyboard
+156:cell_phone
+157:computer
+158:warehouse
+159:potted_plant
+160:palace
+161:beautiful
+162:tie
+163:girl,girls
+164:plane,planes
+165:cart,carts
+166:truck,trucks
+167:load
+168:terminal
+169:tarmac
+170:Airport,airport
+171:golf
+172:Newspapers,newspaper,newspapers
+173:headband
+174:tank_top_
+175:column,columns
+176:wrench
+177:lanyard
+178:orange,orange_,oranges
+179:chair,chairs
+180:hose,hoses
+181:runner,runner_,runners
+182:Robots,robot,robots
+183:knee_pad
+184:convention
+185:bamboo
+186:jersey,jerseys
+187:toy,toys
+188:glass
+189:dog,dogs
+190:blue
+191:day,days
+192:gazebo,gazebos
+193:deck_chair
+194:towel,towels
+195:swimwear
+196:goggles
+197:swimsuit,swimsuits
+198:swimmer,swimmers
+199:pool
+200:bustling
+201:necklace
+202:thread
+203:curtain
+204:bowl,bowls
+205:wig
+206:loom
+207:Construction,construction
+208:barricade,barricades
+209:crate,crates
+210:curb
+211:office
+212:dinghy
+213:waterway
+214:bucket,buckets
+215:sunhat
+216:dumpster
+217:area,areas
+218:bandanna
+219:rickshaw,rickshaws
+220:ride,rides
+221:crowded
+222:arena
+223:ring,rings
+224:billboard,billboards
+225:signage
+226:license_plate
+227:lot,lots
+228:propeller
+229:rural
+230:field,fields
+231:urban
+232:hot-air
+233:hot-air_balloon
+234:tag
+235:scoreboard
+236:baseball_cap
+237:race
+238:smartphone,smartphones
+239:wooden
+240:tram
+241:Tourists,tourist,tourists
+242:tarp,tarps
+243:nest
+244:village
+245:door,doors
+246:entrance,entrances
+247:watch
+248:spotlight,spotlights
+249:polo
+250:timer
+251:sweat_pants
+252:manhole
+253:table,tables
+254:giant_panda
+255:refrigerator
+256:Ruins,ruin,ruins
+257:horse,horses
+258:stamp,stamps
+259:fishing_rod
+260:thermos_bottle
+261:coconut,coconuts
+262:dirt
+263:trail
+264:battery
+265:solar_array
+266:barge,barges
+267:solar
+268:array
+269:water_ski
+270:wet
+271:staircase
+272:picturesque
+273:body
+274:steering_wheel
+275:machine,machines
+276:suitcase,suitcases
+277:jeep,jeeps
+278:mirror
+279:skirt,skirts
+280:lake
+281:sun
+282:drum,drum_,drums
+283:maraca
+284:scissor,scissors
+285:environment
+286:dining_table
+287:crossbar
+288:vest,vests
+289:obstacle
+290:cup,cups
+291:hamburger
+292:food,foods
+293:Dixie_cup
+294:beef,beef_,beefs
+295:reflector
+296:icecream
+297:bun
+298:Ice,ice
+299:rink
+300:painting,paintings
+301:courtyard
+302:land
+303:large
+304:jet,jets
+305:breathtaking
+306:bus,bus_
+307:light,lights
+308:ferry
+309:squirrel
+310:tail
+311:distance
+312:ball,balls
+313:doorknob
+314:fun
+315:monitor,monitor_,monitors
+316:knob
+317:vending_machine
+318:woman
+319:raincoat
+320:segway
+321:puppet
+322:bird,birds
+323:crucifix
+324:sports_ball
+325:palm
+326:walkway,walkways
+327:highway,highways
+328:skyscraper,skyscrapers
+329:button,buttons
+330:tire,tires
+331:snow
+332:Protesters,protester,protesters
+333:baby_buggy
+334:bow,bow_,bows
+335:box
+336:doll,dolls
+337:stroller,strollers
+338:photo,photos
+339:skier,skiers
+340:ski,skis
+341:slope,slopes
+342:fishing
+343:pelican,pelicans
+344:canal,canals
+345:radar
+346:theater,theaters
+347:museum
+348:cat,cats
+349:window,windows
+350:skyline
+351:island,islands
+352:surfboard,surfboards
+353:map
+354:rabbit
+355:almond
+356:carton,cartons
+357:cookie,cookies
+358:bell,bells
+359:stop_sign
+360:weathervane
+361:banana,bananas
+362:number
+363:scenery
+364:cargo
+365:Cows,cow,cows
+366:polo_shirt
+367:handle,handles
+368:equipment
+369:display,displays
+370:show,shows
+371:Monument,monument
+372:hillside
+373:washer
+374:automatic_washer
+375:van,vans
+376:ice_maker
+377:helicopter,helicopters
+378:christmas
+379:tennis
+380:tray
+381:stool,stools
+382:dish_antenna
+383:silo,silos
+384:restaurant,restaurants
+385:cuisine
+386:scale,scale_,scales
+387:market,markets
+388:vegetable,vegetables
+389:cowboy_hat
+390:Fans,fan,fans
+391:basket,baskets
+392:tomato
+393:carrot,carrots
+394:crabmeat
+395:potato
+396:cauliflower
+397:sausage
+398:container,containers
+399:tong,tongs
+400:lettuce
+401:dishes
+402:chopstick,chopsticks
+403:jar,jars
+404:straw,straw_,straws
+405:pot,pots
+406:spoon,spoons
+407:headboard
+408:lampshade
+409:drawer
+410:bed,beds
+411:vase,vases
+412:pillow,pillows
+413:lamp,lamps
+414:chandelier,chandeliers
+415:candle,candles
+416:flower_arrangement
+417:mattress
+418:dresser
+419:living
+420:furniture
+421:ship,ships
+422:dock
+423:ocean
+424:float,floats
+425:slide,slides
+426:thrilling
+427:pant,pants
+428:cigarette_case
+429:mast
+430:apple,apples
+431:buoy
+432:sunglasses
+433:produce
+434:television_set
+435:kite,kites
+436:tv,tvs
+437:duffel_bag
+438:dishwasher_detergent
+439:apron
+440:duck,ducks
+441:photograph
+442:bow-tie
+443:cruise,cruises
+444:cargo_ship
+445:port
+446:rifle,rifles
+447:cattle
+448:yoke_
+449:beanie
+450:splash
+451:flood,floods
+452:mushroom
+453:stump
+454:moped
+455:license
+456:beret
+457:turban,turbans
+458:wagon
+459:miniature
+460:Ferris_wheel
+461:cab,cab_,cabs
+462:trash
+463:tour
+464:amusement
+465:wine_glass
+466:wine,wines
+467:outdoor,outdoors
+468:star,stars
+469:volleyball
+470:pouch
+471:stadium
+472:safety
+473:can,cans
+474:hinge
+475:glove,gloves
+476:carpenter
+477:lush
+478:lantern,lanterns
+479:gargoyle
+480:variety
+481:aircraft
+482:fighter,fighters
+483:roller_skate
+484:Rollerblade
+485:skateboard
+486:activities
+487:training
+488:sled,sleds
+489:winter
+490:family
+491:stone
+492:magazine,magazines
+493:booklet
+494:capabilities
+495:showcase
+496:fighter_jet
+497:cabana
+498:forklift
+499:line,lines
+500:dark
+501:vibrant
+502:folding_chair
+503:speaker,speaker_,speakers
+504:Flowers,flower,flowers
+505:intersection,intersections
+506:soccer_ball
+507:soccer
+508:charming
+509:sunset
+510:calm
+511:golfcart
+512:eagle
+513:obelisk,obelisks
+514:memorial
+515:pickup
+516:gauge,gauges
+517:bolt
+518:thermometer
+519:pressure
+520:backdrop
+521:kitchen
+522:stove
+523:sink,sinks
+524:faucet
+525:cabinet,cabinets
+526:counter
+527:cruise_ship
+528:cloudy
+529:chef,chefs
+530:flap
+531:spectacle,spectacles
+532:skies
+533:motor_vehicle
+534:coffee
+535:seine
+536:film
+537:kuala
+538:crutch
+539:grocery_bag
+540:sunbathe
+541:sand,sands
+542:tank,tank_,tanks
+543:cylinder,cylinders
+544:tablecloth,tablecloths
+545:tartan
+546:coat_hanger
+547:canopy
+548:bazaar
+549:roller
+550:knee
+551:pavement
+552:store,stores
+553:groom
+554:wedding
+555:phone,phones
+556:crown
+557:Christmas_tree
+558:paper
+559:crosswalk
+560:neon
+561:crane,cranes
+562:landing
+563:plume
+564:carry
+565:pepper,peppers
+566:plate,plates
+567:asparagus
+568:shrimp
+569:landscape
+570:hook
+571:parrot,parrots
+572:marina
+573:harbor
+574:doormat
+575:shrine,shrines
+576:choker
+577:women
+578:padlock
+579:magnet
+580:reflection,reflections
+581:biker,bikers
+582:cyclist,cyclists
+583:shawl
+584:cover
+585:taxi,taxis
+586:ceremony
+587:tiara
+588:veil,veils
+589:bouquet,bouquets
+590:man
+591:mudslide
+592:rock,rocks
+593:seat,seats
+594:brass_plaque
+595:banknote
+596:pile,piles
+597:sum
+598:tent,tents
+599:walk
+600:fork
+601:cotton
+602:teapot
+603:dessert,desserts
+604:board,boards
+605:suit,suit_,suits
+606:buddha
+607:cloud,clouds
+608:fire_hydrant
+609:pottery
+610:mug,mugs
+611:teacup
+612:sunflower,sunflowers
+613:cottage
+614:garden,gardens
+615:diaper,diapers
+616:front
+617:holiday
+618:fountain,fountains
+619:boot,boots
+620:contrast
+621:cannon,cannons
+622:sticker
+623:robe,robes
+624:shark,sharks
+625:seagull,seagulls
+626:flock
+627:armband
+628:earring,earrings
+629:jewelry
+630:mannequin,mannequins
+631:stand,stands
+632:atm,atms
+633:pillar,pillars
+634:town
+635:tugboat
+636:bridge,bridges
+637:microphone
+638:organization
+639:alligator
+640:broom
+641:papaya
+642:candle_holder
+643:close
+644:ambulance,ambulances
+645:spear
+646:festival
+647:cliff,cliffs
+648:stupa,stupas
+649:ladder,ladders
+650:home,homes
+651:necktie
+652:cougar
+653:blinker
+654:fish,fish_,fishs
+655:shopper,shoppers
+656:auditorium
+657:orchard
+658:pickup_truck
+659:junkyard
+660:knocker,knocker_,knockers
+661:couch
+662:telescope,telescopes
+663:barrel,barrels
+664:mail_slot
+665:strawberry
+666:iphone,iphones
+667:snack,snacks
+668:business_card
+669:ferris
+670:fair
+671:motorboat
+672:bay
+673:calculator
+674:dancer,dancers
+675:audience
+676:weapon,weapons
+677:army
+678:wagon_wheel
+679:shovel
+680:tight,tights,tights_
+681:bunch
+682:children
+683:cozy
+684:lone
+685:airliner,airliners
+686:takeoff
+687:canoe,canoes
+688:armchair
+689:room
+690:shelf
+691:passenger_ship
+692:desk,desks
+693:cobblestone
+694:skewer,skewers
+695:bin
+696:seafood
+697:bag,bags
+698:balcony
+699:card,cards
+700:silver
+701:pair,pairs
+702:drumstick
+703:mitten
+704:strainer
+705:wok
+706:quilt,quilts
+707:piece,pieces
+708:playground
+709:shopping
+710:birdhouse
+711:sydney
+712:bobbin
+713:product,products
+714:factory
+715:lion,lions
+716:mascot
+717:handcart
+718:passenger_car_
+719:decker
+720:pedestrian,pedestrians
+721:spire,spires
+722:driveway
+723:balustrade
+724:duomo
+725:package,packages
+726:lamb-chop
+727:rib,rib_,ribs
+728:bottle_opener
+729:Meat,meat,meats
+730:picnic
+731:baby
+732:floor,floors
+733:tractor,tractor_,tractors
+734:hay
+735:flannel
+736:carriage,carriages
+737:shield,shields
+738:solemn
+739:interior
+740:cellphone
+741:garage,garages
+742:rooftop,rooftops
+743:guitar,guitars
+744:path
+745:window_box_
+746:pad,pads
+747:tennis_racket
+748:whale
+749:mosaic,mosaics
+750:tile,tiles
+751:colorful
+752:dragon
+753:top
+754:notre
+755:pinecone
+756:bowler_hat
+757:teddy_bear
+758:figurine,figurines
+759:cushion,cushions
+760:bookcase
+761:stall,stalls
+762:clothing
+763:officer,officers
+764:business
+765:binocular,binoculars
+766:leather
+767:stick,sticks
+768:projectile,projectile_,projectiles
+769:garden_hose
+770:briefcase
+771:Police,police
+772:demonstration
+773:tripod
+774:overpass
+775:motor
+776:bulletproof_vest
+777:camouflage
+778:dress_suit
+779:instrument,instruments
+780:back,backs
+781:cook
+782:pan,pan_,pans
+783:curry
+784:sari,saris
+785:desert
+786:wristlet
+787:team,teams
+788:shack
+789:hut,huts
+790:gravestone,gravestones
+791:postcard
+792:shirtless
+793:chapel
+794:drop
+795:fruit,fruits
+796:procession
+797:deity
+798:worship
+799:salami
+800:salad
+801:bridal_gown
+802:liquor
+803:bread
+804:cucumber
+805:blazer
+806:flute_glass
+807:platter
+808:wineglass
+809:broccoli
+810:automobile
+811:Bible,bible
+812:bronze
+813:elephant,elephants
+814:trainer
+815:two
+816:alley
+817:porch
+818:plaid
+819:parasol
+820:lighthouse
+821:slipper,slipper_,slippers
+822:campfire
+823:bonfire
+824:graffiti
+825:short,shorts
+826:cable,cables
+827:rice
+828:eggplant,eggplants
+829:vendor,vendors
+830:clothes_hamper
+831:many
+832:river_boat
+833:pig,pigs
+834:fairy
+835:wingspan
+836:gadget,gadgets
+837:burger
+838:teddy
+839:carousel
+840:coin
+841:parachute
+842:gas
+843:Soldiers,soldier,soldiers
+844:uniform,uniforms
+845:puffin
+846:beverage,beverages
+847:identity_card
+848:sofa
+849:alcohol
+850:ashtray
+851:drink,drinks
+852:party
+853:ostrich
+854:hog
+855:horned_cow
+856:rearview
+857:trailer,trailers
+858:swim
+859:location,locations
+860:sweater
+861:windmill,windmills
+862:wind
+863:steeple,steeples
+864:gathering
+865:walking_stick
+866:palette
+867:pallet,pallets
+868:trunk,trunks
+869:sunshade
+870:yacht,yachts
+871:spice,spices
+872:fill
+873:collection
+874:tin
+875:power
+876:credit
+877:bull,bulls
+878:surrounding,surroundings
+879:ikea
+880:latch
+881:gate,gates
+882:la,las
+883:snowy
+884:wall,walls
+885:musician,musicians
+886:napkin,napkins
+887:lemon,lemons
+888:stirrup
+889:milestone
+890:spectator
+891:sphere
+892:laptop_computer
+893:plant,plants
+894:urn
+895:stair,stairs
+896:waterfront
+897:shutter,shutters
+898:trailer_truck
+899:perspective
+900:forest
+901:step,steps
+902:football,football_,footballs
+903:fabric,fabrics
+904:plaque,plaques
+905:walking_cane
+906:sidewalk
+907:award,awards
+908:pocket_watch
+909:dashboard
+910:windshield
+911:rainy
+912:roman
+913:basketball
+914:hike
+915:earphone
+916:wire,wires
+917:bullhorn
+918:rocket,rockets
+919:ironing_board
+920:dove
+921:racecar
+922:nascar
+923:archery
+924:laboratory
+925:condiment,condiments
+926:chili,chili_,chilis
+927:crisp,crisp_,crisps
+928:supermarket
+929:step_stool
+930:farmer,farmers
+931:printer
+932:wall_socket
+933:belt_buckle
+934:trade
+935:green_onion
+936:brussels_sprouts
+937:daikon
+938:celery
+939:shampoo,shampoos
+940:retail
+941:three
+942:dunkin
+943:pedestal
+944:bicyclist
+945:barrier,barriers
+946:label
+947:aerosol_can
+948:cleansing_agent
+949:trophy
+950:switch
+951:circuit,circuits
+952:meter,meters
+953:panel,panels
+954:payphone,payphones
+955:beauty
+956:square
+957:donut,donuts
+958:cape
+959:pond
+960:speedboat
+961:calendar
+962:lawn
+963:pennant
+964:toolbox
+965:cityscape,cityscapes
+966:pin,pin_,pins
+967:chopping_board
+968:Croissants,croissant,croissants
+969:crescent
+970:pastry
+971:tassel
+972:headdress
+973:skateboarder,skateboarders
+974:skate
+975:remote
+976:laptop,laptops
+977:ladybug
+978:knife
+979:men
+980:hair
+981:ramp,ramps
+982:cross
+983:knight,knights
+984:sword,swords
+985:hot_dog
+986:details
+987:smoky
+988:gargle
+989:detergent
+990:cast
+991:lounge,lounges
+992:bagel
+993:onion,onions
+994:parsley
+995:drone
+996:footage
+997:curve,curves
+998:prawn
+999:textile,textiles
+1000:gasmask
+1001:pyramid,pyramids
+1002:adobe
+1003:broach
+1004:pocket
+1005:key
+1006:classroom
+1007:bush
+1008:wetsuit,wetsuits
+1009:barrette
+1010:moment,moments
+1011:machine_gun
+1012:buffet
+1013:life
+1014:fleet
+1015:wiper
+1016:nightstand
+1017:bedroom
+1018:tinsel
+1019:basketball_backboard
+1020:ballet
+1021:royal
+1022:tea
+1023:tape,tape_,tapes
+1024:easel
+1025:pathway
+1026:wheelchair
+1027:frying_pan
+1028:water_tower
+1029:blackboard
+1030:bar,bars
+1031:swamp
+1032:trafalgar
+1033:sewing_machine
+1034:workbench
+1035:workshop
+1036:vessel
+1037:fries
+1038:brick,bricks
+1039:alleyway
+1040:bathtub
+1041:sack
+1042:garbage_truck
+1043:air
+1044:ski_parka
+1045:sailboat,sailboats
+1046:rose,roses
+1047:concept
+1048:middle
+1049:picture,pictures
+1050:sugarcane,sugarcane_,sugarcanes
+1051:jewellery
+1052:paintbrush
+1053:saxophone
+1054:performance
+1055:ivy
+1056:ottoman
+1057:patio
+1058:planet
+1059:bandage
+1060:pergola
+1061:notebook,notebooks
+1062:heart
+1063:touch
+1064:opening
+1065:cowboy,cowboys
+1066:track,tracks
+1067:deer
+1068:mural,murals
+1069:coffee_maker
+1070:saucer
+1071:place_mat
+1072:trolley
+1073:rubber_band
+1074:protestor
+1075:pier
+1076:hill,hills
+1077:altar
+1078:archway
+1079:cellular_telephone
+1080:pen
+1081:background
+1082:suspenders
+1083:bride
+1084:cufflink
+1085:metal
+1086:net,nets
+1087:tow_truck
+1088:telegraph
+1089:dolphin,dolphins
+1090:giraffe,giraffes
+1091:lumber
+1092:fort
+1093:marketplace
+1094:wheelbarrow
+1095:barrow
+1096:gardenia
+1097:horse_buggy
+1098:anklet
+1099:rail,rails
+1100:mat,mat_,mats
+1101:hockey
+1102:envelope
+1103:camper,camper_,campers
+1104:vintage
+1105:outcrop
+1106:water_jug
+1107:bear
+1108:pink
+1109:thermostat
+1110:cornice
+1111:bulletin
+1112:bakery
+1113:army_tank
+1114:carpet,carpets
+1115:horse_carriage
+1116:lightning_rod
+1117:lagoon
+1118:resort
+1119:thrill
+1120:pew,pew_,pews
+1121:presentation,presentations
+1122:trench
+1123:oven
+1124:cooking
+1125:telephone_booth
+1126:tricycle
+1127:jam,jams
+1128:peanut,peanuts
+1129:memory
+1130:outhouse
+1131:luggage
+1132:kayak,kayaks
+1133:arrangement,arrangements
+1134:head,heads
+1135:game,games
+1136:fire_extinguisher
+1137:shopping_cart
+1138:speed
+1139:toothpick
+1140:bikini
+1141:art,arts
+1142:Swans,swan,swans
+1143:cardigan
+1144:notepad
+1145:morning
+1146:kimono,kimonos
+1147:plantation
+1148:structure,structures
+1149:cherry
+1150:sense
+1151:terrace
+1152:wreath
+1153:footwear
+1154:cake,cakes
+1155:buy
+1156:warrior,warriors
+1157:pumpkin,pumpkins
+1158:indoor
+1159:mother
+1160:icon,icons
+1161:jumpsuit
+1162:sleeping_bag
+1163:yoga
+1164:peaceful
+1165:intricate
+1166:colosseum
+1167:glow
+1168:deck
+1169:cooler,cooler_,coolers
+1170:wet_suit
+1171:camp
+1172:neckerchief
+1173:baseball
+1174:remembrance
+1175:saddlebag
+1176:turtleneck_
+1177:cop
+1178:postbox,postbox_,postboxs
+1179:mailbox,mailbox_,mailboxs
+1180:podium
+1181:object,objects
+1182:lotus
+1183:pizza,pizzas
+1184:hammer
+1185:cigarette,cigarettes
+1186:litter
+1187:messy
+1188:hallway
+1189:camera_lens
+1190:tablet,tablets
+1191:wallet
+1192:drill
+1193:skull
+1194:passport,passports
+1195:pagoda,pagodas
+1196:armor
+1197:model,models
+1198:destination,destinations
+1199:meal,meals
+1200:purple
+1201:aquarium,aquariums
+1202:atv,atvs
+1203:muddy
+1204:exterior,exteriors
+1205:sheep
+1206:pinwheel
+1207:skullcap
+1208:autumn
+1209:customer,customers
+1210:item,items
+1211:soda
+1212:wicker
+1213:sit
+1214:gun
+1215:harmonium
+1216:organ
+1217:brassiere
+1218:waist
+1219:corset
+1220:gondola,gondola_,gondolas
+1221:globe
+1222:zoo
+1223:dance
+1224:sportswear
+1225:keycard
+1226:coverall
+1227:Workers,worker,workers
+1228:saddle_blanket
+1229:surface,surfaces
+1230:coke
+1231:court,courts
+1232:cardboard
+1233:Catch,catch
+1234:coffee_table
+1235:gravy_boat
+1236:seabird
+1237:project,projects
+1238:gown,gowns
+1239:diploma,diplomas
+1240:rescue
+1241:horseback
+1242:mesh
+1243:tapestry
+1244:frescoes
+1245:rally
+1246:clipboard
+1247:subway
+1248:seashell
+1249:blueberry
+1250:muffin
+1251:raspberry
+1252:opera
+1253:mix
+1254:exhibition
+1255:exhibit,exhibits
+1256:patch
+1257:toothbrush
+1258:salon
+1259:hospital
+1260:grape,grapes
+1261:design,designs
+1262:duct_tape
+1263:couple
+1264:bagpipe
+1265:dress_hat
+1266:cornet
+1267:toaster
+1268:television,televisions
+1269:sombrero
+1270:atlantis
+1271:pistol
+1272:troop
+1273:stylus
+1274:baseball_bat
+1275:black
+1276:smoker
+1277:cantaloup
+1278:cantaloupe
+1279:melon,melons
+1280:power_shovel
+1281:bulldozer
+1282:pub
+1283:snowman
+1284:owl
+1285:sandy
+1286:jockey,jockeys
+1287:lorry
+1288:intriguing
+1289:kamps
+1290:auto
+1291:interesting
+1292:reindeer
+1293:goat,goats
+1294:animal,animals
+1295:herd
+1296:visor
+1297:marathon
+1298:Apartment,apartment,apartments
+1299:bank,banks
+1300:karaoke
+1301:speedometer
+1302:stop
+1303:medicine
+1304:several
+1305:poncho
+1306:hedge,hedges
+1307:bowling_ball
+1308:parcel
+1309:doughnut,doughnuts
+1310:convenience
+1311:goods
+1312:fresco
+1313:estate
+1314:trophy_cup
+1315:fire_alarm
+1316:easter
+1317:egg,eggs
+1318:scaffolding
+1319:performer
+1320:ski_boot
+1321:handgun
+1322:dozer
+1323:excavator,excavators
+1324:lexus
+1325:pie,pies
+1326:job
+1327:brown
+1328:mouse,mouse_,mouses
+1329:projector
+1330:railing,railings
+1331:rocky
+1332:concert
+1333:gull
+1334:options
+1335:grocery
+1336:artwork
+1337:escalator,escalators
+1338:mansion
+1339:luxury
+1340:python
+1341:bonnet
+1342:frame
+1343:evening
+1344:portrait
+1345:rio
+1346:round
+1347:money
+1348:grassy
+1349:stirrer
+1350:caldron
+1351:setting
+1352:grille
+1353:celebration
+1354:logo,logos
+1355:dome,domes
+1356:snowmobile,snowmobiles
+1357:kettle
+1358:mallet
+1359:rag
+1360:koala
+1361:closet
+1362:canister
+1363:remote_control
+1364:hassock
+1365:hotel,hotels
+1366:ledge
+1367:mop
+1368:t
+1369:doorway,doorways
+1370:webcam
+1371:johnsons
+1372:temple,temples
+1373:shipping
+1374:postage
+1375:image,images
+1376:ground,grounds
+1377:birdbath
+1378:toilet
+1379:arrow,arrows
+1380:dish
+1381:fire,fires
+1382:otherwise
+1383:attire
+1384:lollipop
+1385:candy
+1386:peach
+1387:plum,plums
+1388:apricot
+1389:beard
+1390:bead,beads
+1391:cosmetic,cosmetics
+1392:cube,cubes
+1393:princess
+1394:underwear
+1395:wrestling
+1396:lab_coat
+1397:chinaware
+1398:appliance,appliances
+1399:hardwood
+1400:space,spaces
+1401:cane
+1402:wrought
+1403:vending
+1404:school
+1405:nature
+1406:yoghurt
+1407:dispenser
+1408:mound,mound_,mounds
+1409:singapore
+1410:stack,stacks
+1411:circle
+1412:milk
+1413:cock
+1414:sleepwear
+1415:heartwarming
+1416:boy,boys
+1417:jewel
+1418:boxing
+1419:theme
+1420:currency
+1421:glasses
+1422:iPod,ipod
+1423:journal
+1424:workspace
+1425:ram,ram_,rams
+1426:saddle,saddle_,saddles
+1427:noseband_
+1428:equestrian
+1429:sunny
+1430:passenger,passengers
+1431:ancient
+1432:festive
+1433:architecture
+1434:radiator
+1435:lesson
+1436:computer_keyboard
+1437:measuring_stick
+1438:marker
+1439:pencil
+1440:pop,pop_,pops
+1441:receipt
+1442:pineapple,pineapples
+1443:toothpaste
+1444:scraper
+1445:surfer,surfers
+1446:offerings
+1447:turbine,turbines
+1448:material,materials
+1449:parking_meter
+1450:blinder,blinder_,blinders
+1451:Aisles,aisle,aisles
+1452:stepladder
+1453:bathroom
+1454:off
+1455:warning
+1456:promenade
+1457:tux
+1458:tool,tools
+1459:wine_bucket
+1460:Chicken,chicken,chicken_,chickens
+1461:radio_receiver
+1462:tobacco_pipe
+1463:wall_clock
+1464:fireplace
+1465:dollar
+1466:goal
+1467:microwave
+1468:soap
+1469:bubble,bubbles
+1470:megaphone
+1471:purse
+1472:player,players
+1473:play
+1474:dunk
+1475:bargains
+1476:pulpit
+1477:bell_pepper
+1478:escape
+1479:bison
+1480:buffalo
+1481:suv
+1482:lavender
+1483:headphone
+1484:company
+1485:wooden_leg
+1486:radio
+1487:face
+1488:drive
+1489:unique
+1490:corner
+1491:engine,engines
+1492:text
+1493:liquidity
+1494:icing
+1495:florist
+1496:dustpan
+1497:campus
+1498:newsstand
+1499:hold
+1500:lab
+1501:cloak
+1502:priority
+1503:hindu
+1504:flashlight
+1505:locker
+1506:wave,waves
+1507:monster
+1508:soup
+1509:charity
+1510:hammock,hammocks
+1511:stroll
+1512:lift,lifts
+1513:sandwich
+1514:greenery
+1515:wreckage
+1516:stream
+1517:course
+1518:marble
+1519:midst
+1520:craftsmanship
+1521:word,words
+1522:turnip
+1523:tunnel
+1524:destroyer
+1525:greenhouse,greenhouses
+1526:data
+1527:buses
+1528:young
+1529:easyjet
+1530:samsung
+1531:somber
+1532:cemetery
+1533:showroom
+1534:flavor,flavors
+1535:referee
+1536:bleacher,bleachers
+1537:monkey,monkeys
+1538:ski_pole
+1539:snowboarder
+1540:clutch
+1541:tokyo
+1542:astronaut,astronauts
+1543:decoration,decorations
+1544:runway
+1545:perfume
+1546:bookshelf
+1547:tennis_ball
+1548:eiffel
+1549:guitarist
+1550:stunt
+1551:video
+1552:lime
+1553:fruit_juice
+1554:Lego,lego
+1555:alarm_clock
+1556:bedding
+1557:bedspread
+1558:leaves
+1559:saw
+1560:plank
+1561:buyuk
+1562:bodyboard
+1563:chocolate_bar
+1564:chocolate
+1565:sale
+1566:wear
+1567:parliament
+1568:nightclub
+1569:headset
+1570:menu
+1571:chalkboard
+1572:fire_hose
+1573:foundation
+1574:elk
+1575:bullet_train
+1576:angel,angels
+1577:queue
+1578:osteria
+1579:pitcher,pitcher_,pitchers
+1580:beer_bottle
+1581:place,places
+1582:treat,treats
+1583:turtle,turtles
+1584:fairground
+1585:cracker,crackers
+1586:biscuit
+1587:buddhist
+1588:experience
+1589:cymbal
+1590:chariot
+1591:acorn,acorns
+1592:walnut,walnuts
+1593:screwdriver
+1594:screen,screens
+1595:ruler
+1596:yogurt
+1597:cornmeal
+1598:color,colors
+1599:overall,overalls,overalls_
+1600:fisherman
+1601:nursery
+1602:donkey,donkeys
+1603:razor
+1604:app,apps
+1605:fireplug
+1606:dishrag
+1607:mule
+1608:shed
+1609:planter,planters
+1610:shelter
+1611:icloud
+1612:outside
+1613:dealership,dealerships
+1614:truffle,truffle_,truffles
+1615:fudge
+1616:sea
+1617:history
+1618:t-shirt
+1619:facade,facades
+1620:stilt,stilts
+1621:toilet_tissue
+1622:lid,lids
+1623:jacuzzi
+1624:bedcover
+1625:generator
+1626:weather
+1627:airshow
+1628:conference
+1629:shade
+1630:reef
+1631:biplane
+1632:penguin,penguins
+1633:green,greens
+1634:beer_can
+1635:series
+1636:beer,beers
+1637:style,styles
+1638:mermaid
+1639:laundromat
+1640:trombone
+1641:trumpet,trumpets
+1642:bass
+1643:gorilla
+1644:firework,fireworks
+1645:graduates
+1646:atrium
+1647:snowboard
+1648:information
+1649:side,sides
+1650:treatment
+1651:certificate
+1652:placard
+1653:steak,steak_,steaks
+1654:swing,swings
+1655:tachometer
+1656:sushi
+1657:tuna
+1658:saltshaker
+1659:trampoline
+1660:wheelie
+1661:bulletin_board
+1662:hairnet
+1663:medical
+1664:surgeons
+1665:cricket,crickets
+1666:cabbage,cabbages
+1667:fiat
+1668:chain
+1669:eerie
+1670:bass_horn
+1671:band
+1672:kilt
+1673:drummer
+1674:mesmerizing
+1675:marigold
+1676:thumbtack
+1677:router,router_,routers
+1678:paper_towel
+1679:microscope,microscopes
+1680:daybed
+1681:storefront,storefronts
+1682:branch
+1683:ballet_skirt
+1684:chess
+1685:mobile
+1686:gingerbread
+1687:pharmacy
+1688:wendy,wendys
+1689:cryptocurrency
+1690:lip_balm
+1691:bee,bees
+1692:bug
+1693:crack
+1694:attention
+1695:miniskirt
+1696:gift_wrap
+1697:seaplane
+1698:dining
+1699:rain
+1700:wine_bottle
+1701:lunch
+1702:monk,monks
+1703:cave
+1704:mausoleum
+1705:banco
+1706:bath,baths
+1707:bat,bat_,bats
+1708:halloween
+1709:sidecar
+1710:plywood
+1711:cloth
+1712:nike
+1713:geranium,geraniums
+1714:blackberry
+1715:berry
+1716:tart
+1717:ladle
+1718:façade
+1719:time,times
+1720:binder
+1721:racer
+1722:gravel
+1723:brandenburger
+1724:reed,reeds
+1725:craft,crafts
+1726:crossing
+1727:open
+1728:afterpay
+1729:landfill
+1730:shoreline
+1731:racket,rackets
+1732:activity
+1733:blimp
+1734:read
+1735:lane,lanes
+1736:inside
+1737:chain_mail
+1738:camcorder
+1739:bookstore
+1740:squash
+1741:watermelon,watermelons
+1742:gourd
+1743:footstool
+1744:desolate
+1745:houseboat
+1746:lobby
+1747:spacecraft
+1748:reclining
+1749:register
+1750:arcade
+1751:driftwood
+1752:oil_lamp
+1753:flash
+1754:cupcake
+1755:assortment
+1756:disaster
+1757:mousepad
+1758:pencil_sharpener
+1759:vineyard,vineyards
+1760:trevi
+1761:wood,woods
+1762:stairway
+1763:butterfly
+1764:feather,feathers
+1765:halter_top
+1766:infant
+1767:bobby_pin
+1768:pleasant
+1769:ups
+1770:exercise,exercises
+1771:roundabout
+1772:talent
+1773:neighborhood
+1774:boulder,boulders
+1775:herbs
+1776:hall
+1777:captivating
+1778:cycling
+1779:stainless
+1780:ben
+1781:Volkswagen,volkswagen
+1782:attraction,attractions
+1783:voltage
+1784:edible_corn
+1785:corn
+1786:churches
+1787:piano
+1788:wolf
+1789:railcar,railcar_,railcars
+1790:stage,stages
+1791:size,sizes
+1792:wheat,wheats
+1793:argentina
+1794:satchel
+1795:lit
+1796:gift
+1797:cocktail
+1798:measuring_cup
+1799:emergency
+1800:panda
+1801:soya_milk
+1802:frog
+1803:ribbon,ribbons
+1804:rapids
+1805:café
+1806:goldfish
+1807:fishbowl
+1808:juice,juices
+1809:umpire
+1810:baseball_glove
+1811:catcher
+1812:rope,ropes
+1813:occasion,occasions
+1814:screw
+1815:underside
+1816:niche
+1817:garland
+1818:action
+1819:tranquil
+1820:whatsapp
+1821:messaging
+1822:jet_plane
+1823:note,notes
+1824:pavilion
+1825:catamaran
+1826:seawall
+1827:bullet,bullets
+1828:rise
+1829:wind_chime
+1830:scaffold
+1831:cash
+1832:dusk
+1833:yak
+1834:movie
+1835:cutlery
+1836:plier,pliers
+1837:badge,badges
+1838:hiking
+1839:leotard
+1840:fi
+1841:backyard
+1842:puddle
+1843:horn
+1844:monastery
+1845:catapult
+1846:pear
+1847:site
+1848:crochet
+1849:countryside
+1850:fire_engine
+1851:fireman
+1852:projection
+1853:balconies
+1854:mary
+1855:reception
+1856:dancing
+1857:gazelle
+1858:habitat
+1859:school_bus
+1860:warship
+1861:congestion
+1862:arch
+1863:stingray
+1864:terrain
+1865:mixer,mixer_,mixers
+1866:presence
+1867:handkerchief
+1868:cistern
+1869:shaving_cream
+1870:tube,tubes
+1871:tusk
+1872:debris
+1873:whipped_cream
+1874:jelly
+1875:sparkler_
+1876:seating
+1877:lamborghini
+1878:brand
+1879:crayon
+1880:hermitage
+1881:football_helmet
+1882:frosting
+1883:lighting
+1884:corridor
+1885:vinegar
+1886:arches
+1887:pantyhose
+1888:zebra,zebras
+1889:shower_head
+1890:ornate
+1891:polka
+1892:wand,wands
+1893:wristband
+1894:chimney,chimneys
+1895:accordion
+1896:headstall_
+1897:fujifilm
+1898:writing
+1899:conservatory
+1900:result
+1901:goose
+1902:beehive
+1903:maid
+1904:mailboxes
+1905:railroad
+1906:barbie
+1907:thai
+1908:sportback
+1909:pick
+1910:coatrack
+1911:yellow
+1912:hand,hands
+1913:coil
+1914:f1
+1915:candy_cane
+1916:boxing_glove
+1917:skiing
+1918:twitter
+1919:dice
+1920:die
+1921:silverware
+1922:pickle
+1923:tiger
+1924:mud
+1925:ceramic
+1926:bookmark
+1927:twine
+1928:setup
+1929:go
+1930:headquarter,headquarters
+1931:shot
+1932:golfer,golfers
+1933:tortoise
+1934:graduation
+1935:sweat
+1936:monarch
+1937:heritage
+1938:water_scooter
+1939:tender
+1940:wash
+1941:sundial
+1942:bath_mat
+1943:pitchfork
+1944:desktop
+1945:lip
+1946:inhaler
+1947:surreal
+1948:fox
+1949:work
+1950:bust,busts
+1951:control,controls
+1952:heater
+1953:farm
+1954:asml
+1955:rodeo
+1956:flipper,flipper_,flippers
+1957:yard
+1958:water_cooler
+1959:chap
+1960:slav
+1961:syringe,syringes
+1962:earplug
+1963:cork,cork_,corks
+1964:grinder
+1965:lizard
+1966:minaret,minarets
+1967:hangar
+1968:University,university
+1969:clay
+1970:fleece
+1971:coastline
+1972:crew
+1973:snowstorm
+1974:avocado
+1975:commuter
+1976:year,years
+1977:missile
+1978:ritual
+1979:hang
+1980:malaysia
+1981:wikipedia
+1982:television_camera
+1983:dog_collar
+1984:claw
+1985:priest,priests
+1986:ingredient,ingredients
+1987:clown
+1988:rug
+1989:dumbbell
+1990:mahal
+1991:flea
+1992:handcuff
+1993:Anchor,anchor
+1994:ice_skate
+1995:hockey_stick
+1996:brake_light
+1997:dinosaur,dinosaurs
+1998:drag
+1999:Smoke,smoke
+2000:inspiring
+2001:jug,jugs
+2002:holly
+2003:igniter
+2004:shore,shores
+2005:freight
+2006:hibiscus
+2007:hollywoodreporter.com
+2008:expanse
+2009:dalmatian
+2010:foggy
+2011:bullfighting
+2012:ginger
+2013:type,types
+2014:ballroom
+2015:maintenance
+2016:fawn
+2017:enclosure
+2018:steel
+2019:set
+2020:puzzle
+2021:energy
+2022:operating
+2023:medieval
+2024:well
+2025:rugby
+2026:police_cruiser
+2027:uk
+2028:mammoth
+2029:australia
+2030:moody
+2031:beeper
+2032:pork
+2033:dishtowel
+2034:spring,springs
+2035:corvette
+2036:sheet,sheets
+2037:designer
+2038:start
+2039:nutcracker
+2040:submarine
+2041:stormy
+2042:ham
+2043:salmon,salmon_,salmons
+2044:borobudur
+2045:flame,flames
+2046:concentration
+2047:shepherd
+2048:military
+2049:birdcage
+2050:zucchini
+2051:concrete
+2052:barn
+2053:transporter
+2054:breechcloth
+2055:gemstone,gemstones
+2056:teepee,teepees
+2057:catedral
+2058:bulletproof
+2059:parasail,parasail_,parasails
+2060:end
+2061:mercedes
+2062:freight_car
+2063:cage
+2064:waterfall,waterfalls
+2065:stretcher
+2066:paramedic,paramedics
+2067:lighter
+2068:tug
+2069:cream
+2070:mobility
+2071:watering_can
+2072:dollhouse
+2073:serve
+2074:hit
+2075:Electronics,electronic,electronics
+2076:iron,iron_,irons
+2077:break
+2078:road_map
+2079:wing,wings
+2080:violinist
+2081:music
+2082:graveyard
+2083:drinking
+2084:liquid
+2085:arctic_
+2086:kiosk,kiosks
+2087:fortress
+2088:pegboard
+2089:handsaw
+2090:chairlift
+2091:individuals
+2092:electricity
+2093:poultry
+2094:condition,conditions
+2095:pony
+2096:corral
+2097:vespa
+2098:caravan
+2099:date,date_,dates
+2100:asters
+2101:advertisement,advertisements
+2102:loafer,loafers
+2103:country
+2104:spatula
+2105:rugged
+2106:slab
+2107:barbed
+2108:stark
+2109:post
+2110:slaw
+2111:moon
+2112:internet
+2113:petal,petals
+2114:swimming
+2115:colander
+2116:paper_plate
+2117:armoire
+2118:cappuccino
+2119:nut,nuts
+2120:nail,nails
+2121:valley
+2122:cactus
+2123:appetizer
+2124:bridal
+2125:samurai
+2126:sweet_potato
+2127:incense
+2128:gold,golds
+2129:exit
+2130:houseplant
+2131:limo
+2132:fly
+2133:makeshift
+2134:flute
+2135:creatures
+2136:file,file_,files
+2137:loaf
+2138:Clothes,clothe,clothes
+2139:peace
+2140:spider
+2141:child
+2142:compass
+2143:driving
+2144:kitchenware
+2145:ladies
+2146:clothespin
+2147:name,names
+2148:vantage
+2149:fur,furs
+2150:claus
+2151:purpose,purposes
+2152:ax
+2153:hand_glass
+2154:freedom
+2155:skater,skaters
+2156:beijing
+2157:amplifier
+2158:pirate_flag
+2159:flight,flights
+2160:elevator_car
+2161:pill
+2162:london
+2163:tambourine
+2164:pacifier
+2165:blood
+2166:gaming
+2167:situation
+2168:beak
+2169:hairbrush
+2170:vacuum_cleaner
+2171:tote
+2172:provoking
+2173:rocking_chair
+2174:convertible,convertible_,convertibles
+2175:sleigh
+2176:foot
+2177:pit
+2178:keg
+2179:batch
+2180:penny,penny_,pennys
+2181:old
+2182:illuminate
+2183:domestic_ass
+2184:feature,features
+2185:minibus
+2186:sawhorse
+2187:appetizing
+2188:wrestler,wrestlers
+2189:match
+2190:cello
+2191:brandenburg
+2192:strawman
+2193:banyan
+2194:arab
+2195:courthouse
+2196:bathrobe
+2197:lemonade
+2198:lace
+2199:illustration
+2200:booth,booths
+2201:coaster
+2202:converse
+2203:shape
+2204:zoom
+2205:mine
+2206:hilltop
+2207:denim
+2208:pepper_mill
+2209:patty_
+2210:contest
+2211:cabin,cabins
+2212:swarovski
+2213:sing
+2214:pajama,pajamas
+2215:taj
+2216:vine,vines
+2217:enchanting
+2218:dune,dunes
+2219:blender
+2220:security
+2221:hairpin
+2222:hoodie
+2223:phonograph_record
+2224:vanity
+2225:tangerine
+2226:citrus
+2227:orange_juice
+2228:jungle
+2229:grave,graves
+2230:garlic
+2231:beanbag
+2232:skill,skills
+2233:seoul
+2234:pitch
+2235:paintball
+2236:eraser
+2237:nosebag_
+2238:mechanic
+2239:accident
+2240:splatters
+2241:blend
+2242:steam
+2243:charging
+2244:ticket,tickets
+2245:journey
+2246:nerf
+2247:beam,beams
+2248:dense
+2249:benches
+2250:casino,casinos
+2251:white
+2252:nativity
+2253:lifeguard
+2254:hood,hoods
+2255:fedora,fedoras
+2256:wooden_spoon
+2257:tableware
+2258:mcdonald,mcdonalds
+2259:policeman
+2260:club
+2261:center
+2262:knitting_needle
+2263:synagogue
+2264:pantheon
+2265:skeleton
+2266:lawn_mower
+2267:cuban
+2268:kangaroo
+2269:drawing
+2270:salesforce
+2271:cassette
+2272:significance
+2273:sunrise
+2274:hanger
+2275:phonebook
+2276:muscle
+2277:angle
+2278:selection
+2279:unicycle
+2280:lifestyle
+2281:robertusburg
+2282:fixture
+2283:locomotive
+2284:landmark,landmarks
+2285:tissue_paper
+2286:armed
+2287:decorate
+2288:residents
+2289:forum
+2290:cafe,cafes
+2291:nutella
+2292:comic_book
+2293:DVD,dvd
+2294:pocketknife
+2295:rainbow
+2296:butter
+2297:part,parts
+2298:sony
+2299:crumb
+2300:new,news
+2301:surf
+2302:shaker
+2303:canyon
+2304:3i
+2305:flagship
+2306:yarn
+2307:extinguisher
+2308:parakeet
+2309:glimpse
+2310:attach
+2311:sunlight
+2312:compete
+2313:moat
+2314:ice_pack
+2315:fintech
+2316:rack,racks
+2317:sell
+2318:daring
+2319:bonsai
+2320:rhinoceros
+2321:case,cases
+2322:livestock
+2323:chime
+2324:marching
+2325:typewriter
+2326:olympic
+2327:stir
+2328:coastal
+2329:musical_instrument
+2330:season,seasons
+2331:verdi
+2332:expo
+2333:branches
+2334:butcher
+2335:heel,heels
+2336:clothesline
+2337:kiwi
+2338:mango
+2339:pearl
+2340:potatoes
+2341:defender
+2342:press
+2343:santa
+2344:ornament,ornaments
+2345:shower_curtain
+2346:towel_rack
+2347:bulldog
+2348:transportation
+2349:rider,riders
+2350:lamartine
+2351:dirt_bike
+2352:state
+2353:pancake
+2354:dam
+2355:windsock
+2356:spot
+2357:Band_Aid
+2358:waiter
+2359:long
+2360:banquet
+2361:violin
+2362:heron
+2363:ambiance
+2364:carving,carvings
+2365:facility
+2366:athlete,athletes
+2367:birthday
+2368:suspension
+2369:clarinet
+2370:kit
+2371:recorder
+2372:grove
+2373:beachball
+2374:trophies
+2375:underclothes
+2376:restroom
+2377:wetland
+2378:need
+2379:wrapper,wrappers
+2380:antengroup
+2381:clasp
+2382:advertising
+2383:amphitheater
+2384:airman
+2385:dial,dials
+2386:snake
+2387:pool_table
+2388:flowering
+2389:caution
+2390:warm
+2391:cabinetry
+2392:session
+2393:Theatre,theatre,theatres
+2394:run
+2395:eyepatch
+2396:bersih
+2397:santander
+2398:microwave_oven
+2399:skylight
+2400:bengal
+2401:winding
+2402:grandeur
+2403:venue
+2404:flamingo,flamingos
+2405:stork
+2406:sephora
+2407:pleasing
+2408:pretzel
+2409:cement
+2410:class
+2411:playhouse
+2412:underdrawers
+2413:funnel
+2414:leafy
+2415:cookout
+2416:rodent
+2417:bandeau
+2418:hummus
+2419:orchid,orchids
+2420:rig
+2421:oil,oils
+2422:components
+2423:battle
+2424:valve
+2425:tomb,tombs
+2426:crocodile,crocodiles
+2427:asphalt
+2428:full
+2429:sparrow
+2430:list
+2431:frozen
+2432:essence
+2433:fume_hood
+2434:cooking_utensil
+2435:kitchen_sink
+2436:spice_rack
+2437:cider
+2438:love
+2439:shelve,shelves
+2440:technology
+2441:starfish
+2442:kitchen_table
+2443:dartboard
+2444:foam
+2445:rosary
+2446:airways
+2447:checkerboard
+2448:countertop
+2449:emblem
+2450:poker,poker_,pokers
+2451:blacksmith
+2452:armadillo
+2453:tub
+2454:bougainvillea
+2455:innocence
+2456:stuff
+2457:lasagna
+2458:edge
+2459:karting
+2460:chloe
+2461:judo
+2462:veterans
+2463:parka
+2464:pet
+2465:fil
+2466:turkey,turkey_,turkeys
+2467:hurdle
+2468:oyster,oysters
+2469:clam
+2470:table-tennis_table
+2471:tape_measure
+2472:goalkeeper
+2473:stationery
+2474:clip
+2475:lipstick
+2476:tissue
+2477:centerpiece
+2478:figure,figures
+2479:cadbury
+2480:juventus
+2481:hotplate
+2482:whistle
+2483:property
+2484:maple
+2485:cupboard
+2486:parthenon
+2487:brochure,brochures
+2488:ketchup
+2489:laundry
+2490:birch
+2491:terrarium
+2492:crock_pot
+2493:paperback
+2494:shears
+2495:bounds
+2496:green_bean
+2497:fresh
+2498:bunny
+2499:sponge
+2500:disco
+2501:check - in
+2502:alsace
+2503:modern
+2504:shellfish
+2505:narrow
+2506:studio,studios
+2507:rental
+2508:chihuahua
+2509:stapler,stapler_,staplers
+2510:measure,measures
+2511:wardrobe
+2512:caboose
+2513:dandelion
+2514:paella
+2515:griddle
+2516:panorama
+2517:department
+2518:candy_bar
+2519:seaside
+2520:firefighter,firefighters
+2521:teakettle
+2522:cash_register
+2523:snail,snails
+2524:conveyor
+2525:ponytail
+2526:hogwart,hogwarts
+2527:liverpool
+2528:louvre
+2529:document,documents
+2530:message
+2531:pose,poses
+2532:marsh
+2533:popolo
+2534:choice
+2535:stairwell
+2536:shower_cap
+2537:pollen
+2538:sweatband
+2539:sunglass
+2540:jellyfish
+2541:dresses
+2542:crib
+2543:iceberg,icebergs
+2544:approach
+2545:dairy
+2546:peanut_butter
+2547:coal
+2548:dough
+2549:disneyland
+2550:roll,rolls
+2551:notice
+2552:bmw
+2553:innut
+2554:starting
+2555:kbs
+2556:athens
+2557:racing
+2558:peacock
+2559:forge
+2560:Toast,toast,toast_,toasts
+2561:mp3
+2562:linen
+2563:duvet
+2564:friend,friends
+2565:manga
+2566:s10
+2567:playroom
+2568:tinfoil
+2569:brewery
+2570:supplies
+2571:amc
+2572:daisy
+2573:meadow
+2574:tannery
+2575:meatball
+2576:deadbolt
+2577:dhl
+2578:utensil
+2579:folder
+2580:hbf
+2581:music_stool
+2582:diver,divers
+2583:barbell
+2584:headdresses
+2585:service,services
+2586:drying
+2587:pomeranian
+2588:safety_pin
+2589:roofed
+2590:manger
+2591:nutrition
+2592:magnifying
+2593:sandstone
+2594:bushes
+2595:outfit,outfits
+2596:hair_drier
+2597:smoothie
+2598:pattern,patterns
+2599:shell,shells
+2600:way
+2601:reamer_
+2602:controller,controllers
+2603:pirate
+2604:whiteboard
+2605:lecture
+2606:torii
+2607:teammate,teammates
+2608:comb
+2609:cool
+2610:bundle,bundles
+2611:cashew
+2612:harvesting
+2613:maxi
+2614:moonlit
+2615:sherbert
+2616:bob
+2617:Antiques,antique,antiques
+2618:check
+2619:foliage
+2620:point
+2621:feet
+2622:circular
+2623:hanging
+2624:honey
+2625:duffel
+2626:macy
+2627:gear,gears
+2628:plow,plow_,plows
+2629:home_plate_
+2630:batter,batter_,batters
+2631:wallpaper
+2632:latticework
+2633:victory
+2634:kiwi_fruit
+2635:scoop
+2636:meeting
+2637:freshener
+2638:epson
+2639:ecommerce
+2640:unicorn
+2641:relaxation
+2642:weisshaar
+2643:present,presents
+2644:character,characters
+2645:amg
+2646:commercial
+2647:google
+2648:visitors
+2649:elevator
+2650:thermos
+2651:rag_doll
+2652:soil
+2653:pump
+2654:antelope
+2655:nun,nuns
+2656:dahlia
+2657:toyshop
+2658:boiled_egg
+2659:shallow
+2660:spray
+2661:classic
+2662:nurse
+2663:rainforest
+2664:hornet
+2665:gallery
+2666:competition
+2667:donation
+2668:vegetation
+2669:health
+2670:aftermath
+2671:intensity
+2672:chip,chips
+2673:nightlife
+2674:lens
+2675:relaxing
+2676:emmy
+2677:carnation
+2678:mower
+2679:sheepdog
+2680:chessboard
+2681:vodka
+2682:baboon
+2683:paradise
+2684:geisha
+2685:ant,ants
+2686:behavior
+2687:ghost
+2688:procedure,procedures
+2689:husky
+2690:cleanliness
+2691:glacier
+2692:casket
+2693:website
+2694:thinkpad
+2695:reenactment
+2696:republic
+2697:water_gun
+2698:flexibility
+2699:water_heater
+2700:quiche
+2701:exciting
+2702:boom_microphone
+2703:turnstile
+2704:recycling
+2705:kennel
+2706:earth
+2707:development
+2708:crowbar
+2709:beekeeper,beekeepers
+2710:niagara
+2711:polar_bear
+2712:manatee
+2713:hippopotamus
+2714:petronas
+2715:wintery
+2716:cause
+2717:delightful
+2718:paint
+2719:draw
+2720:chaise_longue
+2721:blossom,blossoms
+2722:interview
+2723:tortilla
+2724:dutch
+2725:artifact,artifacts
+2726:kennedy
+2727:checkbook
+2728:tomatoes
+2729:korean
+2730:champagne
+2731:porthole
+2732:capsule
+2733:system
+2734:marches
+2735:cornbread
+2736:grey
+2737:refugee
+2738:downtown
+2739:rear
+2740:enthusiasts
+2741:student,students
+2742:hairstyle
+2743:barbecue
+2744:guard,guards
+2745:purchase
+2746:turret,turrets
+2747:a cathedral
+2748:lifeboat,lifeboats
+2749:possum
+2750:shaver,shaver_,shavers
+2751:potter
+2752:snowplow
+2753:stereo,stereo_,stereos
+2754:freeway
+2755:process
+2756:hot_sauce
+2757:rainstorm
+2758:holi
+2759:porsche,porsches
+2760:eel
+2761:anvil
+2762:crape
+2763:fridge
+2764:aromatherapy
+2765:amphitheatre
+2766:dimly
+2767:four
+2768:potholder
+2769:microwave oven
+2770:cheerleading
+2771:coast
+2772:smartwatch
+2773:retaining
+2774:courgette
+2775:transfusion
+2776:appearance
+2777:alarm
+2778:trick
+2779:jukebox
+2780:willow
+2781:antler,antlers
+2782:townhouses
+2783:huawei
+2784:jump
+2785:barren
+2786:bra
+2787:medal,medals
+2788:tombstone,tombstones
+2789:dirty
+2790:cartoon
+2791:rein,reins
+2792:industry
+2793:lives
+2794:braid
+2795:amusing
+2796:watering
+2797:sauce,sauces
+2798:flip
+2799:circus
+2800:driver,drivers
+2801:rooster
+2802:viewpoint
+2803:fern,ferns
+2804:scarecrow
+2805:persimmon
+2806:media
+2807:Crabs,crab,crab_,crabs
+2808:hydrangea,hydrangeas
+2809:crowwell
+2810:tsmc
+2811:condom
+2812:carabine
+2813:durian
+2814:coffeepot
+2815:console
+2816:shower
+2817:tofu
+2818:leaf
+2819:trim
+2820:americas
+2821:participant,participants
+2822:massage
+2823:cell
+2824:apostles
+2825:seal
+2826:catfish
+2827:golf_club
+2828:razorblade
+2829:brush
+2830:lamb,lamb_,lambs
+2831:speedway
+2832:climate
+2833:clinic
+2834:cib
+2835:cascades
+2836:skating
+2837:grassland
+2838:throne
+2839:rim
+2840:octopus
+2841:jaar
+2842:leisure
+2843:stalks
+2844:dry
+2845:chainsaw
+2846:review
+2847:tradução
+2848:sagrada
+2849:bean,beans
+2850:anniversary
+2851:inviting
+2852:intimate
+2853:mtv
+2854:lodge
+2855:importance
+2856:artichoke
+2857:CD_player
+2858:crash
+2859:alpaca
+2860:eden
+2861:tuk,tuks
+2862:photographer,photographers
+2863:armada
+2864:trellis
+2865:sailing
+2866:pine
+2867:engaging
+2868:walmart
+2869:pull
+2870:attic
+2871:combination_lock
+2872:storage
+2873:bunk
+2874:beetle
+2875:bureau
+2876:dormitory
+2877:symbol,symbols
+2878:demon
+2879:cleaning
+2880:reichsbahn
+2881:grasshopper
+2882:perform
+2883:fry
+2884:hollywood
+2885:secure
+2886:baggage
+2887:conversation
+2888:delays
+2889:mission
+2890:hydrant
+2891:flyer
+2892:facebook
+2893:Starbucks,starbucks
+2894:basilica
+2895:challenging
+2896:steering
+2897:backhoe
+2898:connection
+2899:deichmann
+2900:double
+2901:excitement
+2902:salsa
+2903:compost
+2904:abundance
+2905:black_sheep
+2906:reminder
+2907:crouton
+2908:nose
+2909:radish
+2910:ipad,ipads
+2911:welcoming
+2912:kitten,kittens
+2913:needle
+2914:seahorse
+2915:leash
+2916:clearing
+2917:creation,creations
+2918:mini
+2919:shot_glass
+2920:infirm
+2921:exposure
+2922:misty
+2923:cluster
+2924:cheer
+2925:airy
+2926:mickey
+2927:first-aid_kit
+2928:medication
+2929:shift
+2930:patient
+2931:operate
+2932:routine,routines
+2933:afro
+2934:infinity
+2935:scrap,scraps
+2936:slime
+2937:powerpoint
+2938:contrasting
+2939:vagina
+2940:cayenne,cayenne_,cayennes
+2941:network
+2942:usd
+2943:fume
+2944:passageway
+2945:bacon
+2946:sugar_bowl
+2947:croatia
+2948:comforter
+2949:gymnastics
+2950:trench_coat
+2951:underwater
+2952:gymnast
+2953:base
+2954:integration
+2955:chipps
+2956:powder
+2957:fashion
+2958:playpen
+2959:merry
+2960:kampot
+2961:barber
+2962:carrefour
+2963:treadmill
+2964:determination
+2965:hand_towel
+2966:Raindrops,raindrop,raindrops
+2967:chest
+2968:basin
+2969:seesaw
+2970:egg_yolk
+2971:arbor
+2972:residence
+2973:ford
+2974:saucepan
+2975:totem
+2976:abacus
+2977:cord
+2978:guys
+2979:device,devices
+2980:playingfield
+2981:nightshirt
+2982:bib
+2983:toaster_oven
+2984:syrup
+2985:cowbell
+2986:orangutan,orangutans
+2987:viewing
+2988:olive
+2989:grid
+2990:contents
+2991:hedgehog
+2992:levi
+2993:sight,sights
+2994:tosoh
+2995:rialto
+2996:wakeboard
+2997:facilities
+2998:gravy
+2999:manhattan
+3000:milkshake
+3001:jelly_bean
+3002:jelly bean
+3003:owner
+3004:seafloor
+3005:airline,airlines
+3006:recording
+3007:darth
+3008:quarry
+3009:cosplay
+3010:biology
+3011:machete
+3012:stuffed
+3013:pothole
+3014:hair_dryer
+3015:sharm
+3016:world
+3017:price,prices
+3018:war
+3019:alien
+3020:rusty
+3021:can_opener
+3022:holder
+3023:tarot
+3024:grate
+3025:dust
+3026:tournament
+3027:igloo
+3028:union
+3029:woa
+3030:functioning
+3031:bartender
+3032:untidy
+3033:popsicle
+3034:damage
+3035:slum
+3036:artillery
+3037:manchester
+3038:half
+3039:reel
+3040:cricketer
+3041:Brownies,brownie,brownies
+3042:grater
+3043:shard
+3044:zara
+3045:leeks
+3046:practice,practices
+3047:gullfoss
+3048:leons
+3049:fall
+3050:stew
+3051:grapefruit
+3052:amazon
+3053:amount
+3054:ad
+3055:pre
+3056:gaza
+3057:removal
+3058:pomegranate
+3059:small
+3060:bath_towel
+3061:hole,holes
+3062:cheese
+3063:calligraphy
+3064:duty
+3065:balance,balances
+3066:ferrari
+3067:enterprise
+3068:clifton
+3069:cereal
+3070:twin
+3071:packet,packets
+3072:blowfish
+3073:blind
+3074:shadow,shadows
+3075:society
+3076:bread-bin
+3077:emergencies
+3078:xylophone
+3079:plan
+3080:roast
+3081:collar
+3082:milka
+3083:disposal
+3084:coffin
+3085:vat
+3086:string,strings
+3087:availability
+3088:tiananmen
+3089:pilot,pilots
+3090:collage
+3091:storm
+3092:nighttime
+3093:dishwasher
+3094:snowshoe
+3095:one,ones
+3096:costa
+3097:rover
+3098:cypress
+3099:footstall
+3100:pride
+3101:egret
+3102:milky
+3103:ballerina
+3104:aktiengesellschaft
+3105:drain
+3106:scrubbing_brush
+3107:degree
+3108:shredder,shredder_,shredders
+3109:ephesus
+3110:handful
+3111:legume
+3112:stethoscope
+3113:vastness
+3114:CD,cd
+3115:disc
+3116:reno
+3117:mill
+3118:joystick
+3119:chevrolet
+3120:piggy
+3121:edmonton
+3122:vigil
+3123:burrito
+3124:bitcoin
+3125:brownstone
+3126:macaque
+3127:birthday_cake
+3128:complex
+3129:corkscrew
+3130:entertainment
+3131:chipmunk
+3132:poppy
+3133:confetti
+3134:witch
+3135:tailor
+3136:temperature
+3137:coach
+3138:swivel
+3139:nailfile
+3140:tori
+3141:canteen
+3142:direction,directions
+3143:banjo
+3144:bud
+3145:china
+3146:snowmen
+3147:efforts
+3148:toronto
+3149:babies
+3150:kebab
+3151:car_battery
+3152:balancer
+3153:pasture
+3154:invitation
+3155:cockpit
+3156:martini
+3157:brooklyn
+3158:telus
+3159:Waffles,waffle,waffles
+3160:laser
+3161:tabby
+3162:reality
+3163:celebrate
+3164:germany
+3165:raven
+3166:predators
+3167:videotape
+3168:gothic
+3169:preparation
+3170:mangrove
+3171:plain
+3172:actor
+3173:scouting
+3174:charger
+3175:venice
+3176:ditch
+3177:jal
+3178:hen
+3179:pianist
+3180:subaru
+3181:t-72
+3182:carr
+3183:cheerleader,cheerleaders
+3184:charm
+3185:torch
+3186:peak,peaks
+3187:matchbox
+3188:cultivation
+3189:squid,squid_,squids
+3190:succulent,succulents
+3191:quadcopter
+3192:dentist
+3193:impression
+3194:caliper
+3195:duckling
+3196:chickpea
+3197:athletic
+3198:tim
+3199:vacuum
+3200:disney
+3201:assembly
+3202:mess
+3203:pheasant
+3204:karlovy
+3205:handlebars
+3206:refreshing
+3207:lobster,lobsters
+3208:tattoo
+3209:boar
+3210:exchange
+3211:stock
+3212:safari
+3213:sharpie
+3214:birthday_card
+3215:Tabasco_sauce
+3216:peterbilt
+3217:search
+3218:tel
+3219:justice
+3220:atomizer
+3221:lily
+3222:mulch
+3223:rake
+3224:hyundai
+3225:bloomberg
+3226:dubai
+3227:burj
+3228:bean_curd
+3229:denmark
+3230:mussel
+3231:playtime
+3232:hunting
+3233:knit
+3234:a net
+3235:antarctica
+3236:passersby
+3237:tulip,tulips
+3238:murder
+3239:warmth
+3240:glaze
+3241:mashed_potato
+3242:sour_cream
+3243:subwoofer
+3244:moss
+3245:centipede
+3246:cute
+3247:chalice
+3248:observatory
+3249:tanker
+3250:cigar,cigars
+3251:saxophonist
+3252:sax
+3253:crest
+3254:harbour
+3255:sashimi
+3256:thrift
+3257:calf
+3258:masai
+3259:clogs
+3260:cafeteria
+3261:raceway
+3262:omelet
+3263:sneaker,sneakers
+3264:shelving
+3265:arrival,arrivals
+3266:hiker,hikers
+3267:opel
+3268:j
+3269:woodpecker
+3270:michael
+3271:refinery
+3272:privata
+3273:chalk,chalks
+3274:cherokee
+3275:clementine
+3276:doctor,doctors
+3277:bridle
+3278:toiletry
+3279:lock,locks
+3280:convoy
+3281:brake
+3282:scuba
+3283:historic
+3284:superhero
+3285:metro
+3286:cinema
+3287:investors
+3288:take
+3289:domino
+3290:junk
+3291:multitude
+3292:moor
+3293:chrysanthemum,chrysanthemums
+3294:daisies
+3295:boutique
+3296:servicescape
+3297:extent
+3298:rat
+3299:formula
+3300:bulb,bulbs
+3301:prison
+3302:harley
+3303:arm,arms
+3304:hug
+3305:outage
+3306:government
+3307:tide
+3308:demolition
+3309:puppy
+3310:outlet,outlets
+3311:juggle
+3312:mode
+3313:hatbox
+3314:crop,crops
+3315:blaster
+3316:teacher
+3317:johnny
+3318:fungi
+3319:form
+3320:shotgun
+3321:prada
+3322:selfridges
+3323:bubble_gum
+3324:passage
+3325:columbia
+3326:fight
+3327:claim
+3328:bodybuilding
+3329:radion
+3330:perch
+3331:birdfeeder
+3332:bumblebee
+3333:eggbeater
+3334:letter,letters
+3335:rendering
+3336:rhododendron
+3337:deliver
+3338:susans
+3339:axe,axes
+3340:comfort
+3341:king
+3342:cooper
+3343:article,articles
+3344:diverse
+3345:rv
+3346:timeless
+3347:resilience
+3348:bosphorus
+3349:dumpling,dumplings
+3350:seaweed
+3351:cristiano
+3352:combination
+3353:crescent_roll
+3354:h
+3355:cleanup
+3356:landscaping
+3357:boxes
+3358:circa
+3359:aqueduct
+3360:tropical
+3361:blooming
+3362:picket
+3363:garment,garments
+3364:personnel
+3365:winery
+3366:velvet
+3367:skytopolis
+3368:centre
+3369:dump
+3370:gardening
+3371:prayer
+3372:silhouette,silhouettes
+3373:ecosport
+3374:fitness
+3375:dealers
+3376:hookah
+3377:businesses
+3378:workstation
+3379:adventure
+3380:preacher
+3381:riot
+3382:messenger
+3383:maritime
+3384:egg_roll
+3385:bridesmaid,bridesmaids
+3386:inscription
+3387:fig
+3388:fiverr
+3389:layer
+3390:toll
+3391:shuttle
+3392:meerkat
+3393:boosters
+3394:vial,vials
+3395:khaki
+3396:budgeting
+3397:water_faucet
+3398:prowess
+3399:pug-dog
+3400:knex
+3401:branding
+3402:scythe
+3403:build
+3404:treasure
+3405:grain
+3406:illumio
+3407:effect,effects
+3408:fossil
+3409:ping-pong_ball
+3410:sander
+3411:appointment,appointments
+3412:vatican
+3413:sill
+3414:signpost
+3415:file_cabinet
+3416:tribute
+3417:euro,euros
+3418:fedex
+3419:space_shuttle
+3420:tuxedo
+3421:falcon
+3422:texture
+3423:dragonfly
+3424:leopard
+3425:walker
+3426:memorabilia
+3427:entertainer
+3428:embrace
+3429:badminton
+3430:printing
+3431:blade
+3432:pea,pea_,peas
+3433:quaint
+3434:sprinkle
+3435:towering
+3436:positions
+3437:bloom,blooms
+3438:welding
+3439:mint_candy
+3440:calendula
+3441:countries
+3442:rainfall
+3443:others
+3444:glider
+3445:aerosol
+3446:makeup
+3447:clipper,clippers,clippers_
+3448:sip
+3449:czech
+3450:lay
+3451:leg,legs
+3452:prix
+3453:virgin
+3454:airpods
+3455:couches
+3456:effectiveness
+3457:kitty
+3458:crow
+3459:gourmet
+3460:skype
+3461:context
+3462:bookshop
+3463:clutch_bag
+3464:souvenir,souvenirs
+3465:iguana
+3466:kick
+3467:prague
+3468:urinal
+3469:crime
+3470:blossoming
+3471:oakley
+3472:waste
+3473:potiers
+3474:spiral
+3475:piazza
+3476:scallop
+3477:macaw
+3478:inter
+3479:olive_oil
+3480:yurt
+3481:stable
+3482:contractors
+3483:baseball_base
+3484:families
+3485:broadcasting
+3486:transition
+3487:stanford
+3488:curling_iron
+3489:traveler,travelers
+3490:popcorn
+3491:candies
+3492:nvidia
+3493:test
+3494:kneels
+3495:address
+3496:india
+3497:community
+3498:kfc
+3499:hospice
+3500:pendulum
+3501:vacation
+3502:comminity
+3503:diving_board
+3504:industrial
+3505:metrocard
+3506:benz
+3507:whiskey
+3508:provider
+3509:unit
+3510:neckband
+3511:lindbergh
+3512:panerai
+3513:gnome
+3514:champion,champions
+3515:dream,dreams
+3516:stretch
+3517:flakes
+3518:shoulder,shoulders
+3519:payment
+3520:battleship
+3521:bet
+3522:burst
+3523:cutout
+3524:appreciation
+3525:coinex
+3526:sleep
+3527:reading
+3528:investment
+3529:kart,karts
+3530:walgreens
+3531:earthquake
+3532:quran
+3533:balenciaga
+3534:banking
+3535:fagioli
+3536:shipwreck
+3537:genex
+3538:Shakespeare
+3539:pilgrim
+3540:online
+3541:ripe
+3542:boxer,boxers
+3543:vaccination
+3544:vodafone
+3545:rangoli
+3546:coventry
+3547:dell
+3548:mustard
+3549:potential
+3550:chocolate_cake
+3551:gateway
+3552:doha
+3553:visa
+3554:use,uses
+3555:koi
+3556:st
+3557:remnant,remnants
+3558:encounter
+3559:food_processor
+3560:botanist
+3561:scores
+3562:aston
+3563:vivid
+3564:lug
+3565:capitol
+3566:burnt
+3567:bentley
+3568:ponce
+3569:keng
+3570:guillotine
+3571:odeon
+3572:dusty
+3573:poland
+3574:rubble
+3575:lockdown
+3576:seminar
+3577:€ 1bn
+3578:democracy
+3579:homeless
+3580:sling,sling_,slings
+3581:photography
+3582:stocking
+3583:cove
+3584:creme
+3585:salvation
+3586:renovation,renovations
+3587:porcelain
+3588:retailer
+3589:geography
+3590:gameboard
+3591:block,blocks
+3592:autumnal
+3593:atmos
+3594:mint,mints
+3595:hobbit
+3596:injection
+3597:rohingya
+3598:telephoto_lens
+3599:ideology
+3600:jetblue
+3601:rod
+3602:a mural
+3603:soup_bowl
+3604:libra
+3605:pack,packs
+3606:mandarin_orange
+3607:jog
+3608:chevy
+3609:wadi
+3610:alcove
+3611:limousine
+3612:teeth
+3613:lead
+3614:timberland
+3615:premiere
+3616:mandala
+3617:centrifuge
+3618:stylist
+3619:durability
+3620:cheetah
+3621:engineering
+3622:lisbon
+3623:move,moves
+3624:highchair
+3625:browser
+3626:sweet,sweets
+3627:kansas
+3628:corkboard
+3629:rex
+3630:humanatura
+3631:siem
+3632:sharpener
+3633:wedding_cake
+3634:toca
+3635:juicy
+3636:hoka
+3637:conflict
+3638:ferret
+3639:billiard
+3640:proximity
+3641:charles
+3642:beaker
+3643:burning
+3644:microsoft
+3645:swab
+3646:mi
+3647:funeral
+3648:victims
+3649:montblanc
+3650:soupspoon
+3651:bites
+3652:census
+3653:orchestra
+3654:logistics
+3655:magnolia
+3656:bullring
+3657:pigtail
+3658:impact
+3659:dangote
+3660:tranquility
+3661:combine
+3662:repair
+3663:cream_pitcher
+3664:prop
+3665:foil
+3666:steamy
+3667:fluffy
+3668:procentown
+3669:bill,bills
+3670:zombie
+3671:stonework
+3672:brooch
+3673:abesses
+3674:playing
+3675:twilight
+3676:snorkel
+3677:escargot
+3678:chamber
+3679:denominations
+3680:starry
+3681:synergy
+3682:abbercrombie
+3683:dji
+3684:daughter,daughters
+3685:artist
+3686:bafta
+3687:noodle,noodles
+3688:fishmongers
+3689:hourglass
+3690:rolling_pin
+3691:mortar
+3692:plaster
+3693:heineken
+3694:patent
+3695:bw
+3696:achievement,achievements
+3697:renault
+3698:chocolate_milk
+3699:charcoal
+3700:huddle
+3701:digger
+3702:multiple
+3703:potala
+3704:captain
+3705:range
+3706:brazil
+3707:guidebook
+3708:b
+3709:leafless
+3710:hamster
+3711:thimble
+3712:livery
+3713:tackle
+3714:mitsubishi
+3715:experiment
+3716:wrap
+3717:tallit
+3718:fog
+3719:footprints
+3720:hippo
+3721:schoolgirls
+3722:jail
+3723:surveying
+3724:delivery
+3725:pasta
+3726:bunk_bed
+3727:complexes
+3728:relief,reliefs
+3729:homework
+3730:record
+3731:ecuador
+3732:demonstrator
+3733:$ 1bn
+3734:longtail
+3735:afternoon
+3736:turn,turns
+3737:chase
+3738:pylons
+3739:puffer
+3740:motorsports
+3741:haircut
+3742:tooth
+3743:kingsbau
+3744:flops
+3745:roadside
+3746:thru
+3747:merchandise
+3748:sewer
+3749:dot,dots
+3750:manner,manners
+3751:surrey
+3752:chisel
+3753:beachgoers
+3754:wat
+3755:change
+3756:casserole
+3757:vuitton
+3758:scheme
+3759:super
+3760:austria
+3761:chanel
+3762:supercar
+3763:zuora
+3764:tasks
+3765:introduction
+3766:dental_floss
+3767:depot
+3768:opponents
+3769:piggy_bank
+3770:celebratory
+3771:flare,flares
+3772:brickwork
+3773:nectarine
+3774:hula
+3775:comforting
+3776:dagger
+3777:florida
+3778:otter
+3779:wilderness
+3780:web
+3781:riverbank
+3782:volunteer
+3783:york
+3784:substation
+3785:victoria
+3786:flask
+3787:smokestack,smokestacks
+3788:honor
+3789:push
+3790:cosmonaut
+3791:copper
+3792:brewing
+3793:menorah
+3794:adidas
+3795:stain
+3796:fit
+3797:salt
+3798:audi
+3799:sensors
+3800:aspects
+3801:navigation
+3802:cooker
+3803:cold
+3804:big
+3805:operation
+3806:moose
+3807:depths
+3808:gator
+3809:hoe
+3810:reichstag
+3811:infrastructure
+3812:abstract
+3813:awareness
+3814:barcelona
+3815:bvlgari
+3816:maternity
+3817:mac
+3818:treasury
+3819:supply
+3820:public
+3821:grade
+3822:mallard
+3823:cbb
+3824:chalet
+3825:maze
+3826:hoop,hoops
+3827:azure
+3828:establishment,establishments
+3829:dickens
+3830:scroll
+3831:summer
+3832:delicacies
+3833:participation
+3834:creepy
+3835:volcano
+3836:allianz
+3837:ban
+3838:minnie
+3839:cisco
+3840:competitor
+3841:philips
+3842:aloe
+3843:crosses
+3844:mexico
+3845:loincloth
+3846:handstand
+3847:strawberries
+3848:congregation
+3849:beachside
+3850:lemur
+3851:rhine
+3852:vacant
+3853:saint,saints
+3854:gt2
+3855:signing
+3856:chemical,chemicals
+3857:dinner
+3858:exploration
+3859:trafigura
+3860:treehouse
+3861:slippery
+3862:satay
+3863:spread
+3864:weed
+3865:brass
+3866:confidence
+3867:paddy
+3868:july
+3869:insulation
+3870:anemone
+3871:cheesecake
+3872:hind
+3873:blueberries
+3874:queen
+3875:lifting
+3876:level,levels
+3877:height
+3878:pike
+3879:lilies
+3880:hustle
+3881:call
+3882:firefighting
+3883:outlook
+3884:baker
+3885:bimex
+3886:samoyed
+3887:reflect
+3888:phonograph
+3889:checkpoint
+3890:tiktok
+3891:eye,eyes
+3892:response
+3893:poinsettia
+3894:broadway
+3895:wildebeest
+3896:coca
+3897:cep
+3898:punch
+3899:backup
+3900:liberty
+3901:walkie
+3902:caramel
+3903:berkshka
+3904:clover
+3905:reptile
+3906:triumph
+3907:fragrances
+3908:slot
+3909:amad
+3910:marvel,marvels
+3911:fancy
+3912:airship
+3913:sound
+3914:hide,hides
+3915:futon
+3916:sailor
+3917:montreal
+3918:murraya
+3919:cellar
+3920:deep
+3921:russia
+3922:tinder
+3923:playstation
+3924:vienna
+3925:bta
+3926:mail
+3927:triathlon
+3928:pakistani
+3929:gelatin
+3930:citywalk
+3931:diner
+3932:tax
+3933:bowler
+3934:finger,fingers
+3935:graph
+3936:investigation
+3937:jetliner
+3938:wildlife
+3939:dedication
+3940:riverbed
+3941:striking
+3942:nap
+3943:dv
+3944:installation
+3945:basement
+3946:phoenix
+3947:bunker
+3948:hatta
+3949:channel
+3950:waltz
+3951:fieglhiller
+3952:genesis
+3953:son
+3954:hong
+3955:breast
+3956:isolation
+3957:dalhousie
+3958:pudding
+3959:revenue
+3960:belgrado
+3961:peeler_
+3962:alcazar
+3963:inflation
+3964:september
+3965:applesauce
+3966:rolex
+3967:haystack
+3968:foreground
+3969:gecko
+3970:shake
+3971:economy
+3972:graham
+3973:hair_curler
+3974:teenagers
+3975:milford
+3976:pipe_bowl
+3977:daylight
+3978:petunia,petunias
+3979:indonesia
+3980:target,targets
+3981:labor
+3982:yolk
+3983:udder
+3984:florence
+3985:graze
+3986:collibra
+3987:cigna
+3988:observation
+3989:signal
+3990:bolivia
+3991:pennies
+3992:simple
+3993:nissan
+3994:xnxx
+3995:vulture
+3996:perches
+3997:hunter
+3998:quartz
+3999:record_player
+4000:suburb
+4001:pavillon
+4002:avia
+4003:jay
+4004:oasis
+4005:manufacturing
+4006:fitbit
+4007:roadster
+4008:measuring
+4009:hazy
+4010:networking
+4011:pizzeria
+4012:cameo
+4013:ironwork
+4014:deutsche
+4015:embroidery
+4016:drug,drugs
+4017:snowblower
+4018:creek
+4019:surfing
+4020:regal
+4021:mandarin
+4022:nation,nations
+4023:marketing
+4024:careless
+4025:fundraiser
+4026:coleslaw
+4027:kids
+4028:update
+4029:pokemon
+4030:firewood
+4031:humboldt
+4032:quality
+4033:jaguar
+4034:styrofoam
+4035:publication
+4036:discovery
+4037:myer
+4038:transformer,transformers
+4039:toyota
+4040:machinery
+4041:domed
+4042:outback
+4043:motel
+4044:wool
+4045:blank
+4046:manicure
+4047:stanley
+4048:gray
+4049:clinique
+4050:roadway
+4051:juicer
+4052:issue,issues
+4053:blur
+4054:fidelity
+4055:nostalgic
+4056:cola
+4057:lenovo
+4058:crawfish
+4059:caterpillar
+4060:qatar
+4061:migrants
+4062:relaxed
+4063:vanilla
+4064:amalfi
+4065:route
+4066:goalpost
+4067:thistle
+4068:aid
+4069:novels
+4070:running
+4071:some
+4072:dior
+4073:morocco
+4074:skin
+4075:trip
+4076:culture
+4077:soy
+4078:bma
+4079:birth
+4080:sunnuck
+4081:shopify
+4082:honda
+4083:yahoo
+4084:tartlets
+4085:startup
+4086:mixture
+4087:tense
+4088:furry
+4089:peony
+4090:pod
+4091:border
+4092:schloss
+4093:buckingham
+4094:harvest
+4095:pepco
+4096:tabletop
+4097:icy
+4098:transport
+4099:siemens
+4100:research
+4101:millennium
+4102:wrestle
+4103:bumper
+4104:researcher
+4105:husband
+4106:zurichchina
+4107:flooring
+4108:dj
+4109:moscow
+4110:growth
+4111:seed,seeds
+4112:lottery
+4113:depicting
+4114:barnes
+4115:singers
+4116:alibaba
+4117:grammy,grammys
+4118:thick
+4119:cut,cuts
+4120:sams
+4121:heuer
+4122:Wii
+4123:july,2017
+4124:workout
+4125:gavel
+4126:floss
+4127:varieties
+4128:shipment
+4129:bustle
+4130:daytime
+4131:energizer
+4132:budhi
+4133:valentino
+4134:unity
+4135:skincare
+4136:pantry
+4137:aurora
+4138:prepaid
+4139:hilton
+4140:blash
+4141:centrify
+4142:karen
+4143:cyber
+4144:gucci
+4145:neat
+4146:smile,smiles
+4147:fillmore
+4148:filmores
+4149:insurance
+4150:acropolis
+4151:precaution
+4152:fuchsia
+4153:access
+4154:bakin
+4155:print
+4156:roach
+4157:bt5
+4158:examination
+4159:surgery
+4160:relax
+4161:helipad
+4162:source
+4163:grasses
+4164:diary
+4165:rum
+4166:tradingview
+4167:outing
+4168:respect
+4169:cradle
+4170:nordstrom
+4171:disk
+4172:sewing
+4173:tesla
+4174:cheerful
+4175:thailand
+4176:gerbera
+4177:caxa
+4178:milhões
+4179:supercenter
+4180:politicians
+4181:chicago
+4182:capital
+4183:cities
+4184:venetian
+4185:gundam
+4186:moving
+4187:pebble
+4188:patrons
+4189:tyne
+4190:cinemax
+4191:sugar
+4192:crater
+4193:breakfast
+4194:incubator
+4195:nozzle
+4196:cocoa,cocoa_,cocoas
+4197:rights
+4198:farmland
+4199:mumbai
+4200:walking
+4201:trinkets
+4202:pure
+4203:suites
+4204:pawn
+4205:balvenie
+4206:nathan
+4207:salad_plate
+4208:algarve
+4209:millions
+4210:hardback_book
+4211:century
+4212:revera
+4213:snapchat
+4214:contain
+4215:bangkok
+4216:artistry
+4217:gin
+4218:humvee
+4219:amtek
+4220:embassy
+4221:galeria
+4222:tap
+4223:vaccine
+4224:vancouver
+4225:pepsi
+4226:support
+4227:convenient
+4228:spans
+4229:miss
+4230:weeknd
+4231:rest
+4232:lloyd
+4233:taco
+4234:nes
+4235:origami
+4236:oscars
+4237:learn
+4238:triomphe
+4239:belgium
+4240:horseman
+4241:coupang
+4242:quote
+4243:countless
+4244:retro
+4245:boarding
+4246:slate
+4247:story
+4248:imports
+4249:pharmacies
+4250:grazing
+4251:fir
+4252:skittles
+4253:thing,things
+4254:shoelace
+4255:inkpad
+4256:rhinoceroses
+4257:solutions
+4258:zte
+4259:snowdrops
+4260:france
+4261:Scotiabank
+4262:charter
+4263:magpie
+4264:oak
+4265:eatery
+4266:wearing
+4267:ticketing
+4268:tiber
+4269:prom
+4270:voter
+4271:editor
+4272:incident
+4273:bait
+4274:contributions
+4275:means
+4276:evergreen
+4277:boiler,boilers
+4278:brussels
+4279:satellite
+4280:bullfighter
+4281:pokémon
+4282:stripe
+4283:lidl
+4284:midpoint
+4285:canon
+4286:instagram
+4287:twig
+4288:relocation
+4289:murky
+4290:raccoon
+4291:curvy
+4292:marijuana
+4293:puff
+4294:panama
+4295:sandcastles
+4296:floral
+4297:little
+4298:juxtaposition
+4299:sphinx
+4300:tonnons
+4301:bark
+4302:blurry
+4303:tea_bag
+4304:sheikh
+4305:blonde
+4306:wrist
+4307:tobacco
+4308:griffith
+4309:province
+4310:streaming
+4311:squeeze
+4312:obcomm
+4313:handout
+4314:wisteria
+4315:toiletries
+4316:masher
+4317:slice
+4318:musket
+4319:wfp
+4320:technician
+4321:toucan
+4322:shooting
+4323:prune
+4324:production
+4325:fillets
+4326:ameritrade
+4327:azad
+4328:sheaf
+4329:webpage
+4330:sofa_bed
+4331:neptunes
+4332:scotland
+4333:joyful
+4334:benedict
+4335:science
+4336:coaches
+4337:ventilator
+4338:alps
+4339:danger,dangers
+4340:section
+4341:college
+4342:meditating
+4343:humans
+4344:alfa
+4345:pollution
+4346:election
+4347:sandwiches
+4348:southwest
+4349:distillery
+4350:newfoundland
+4351:bonita
+4352:irrigation
+4353:commemorates
+4354:hbo
+4355:user
+4356:galleria
+4357:sauna
+4358:blacksmithing
+4359:repellent
+4360:evolutionfit
+4361:doge
+4362:kazakhstan
+4363:mysteries
+4364:drift
+4365:trapeze
+4366:language
+4367:foosball
+4368:electrolux
+4369:choir
+4370:two fish
+4371:motherboard
+4372:greek
+4373:fiction
+4374:armour
+4375:blindfolds
+4376:bales
+4377:merlion
+4378:bishop
+4379:raisin
+4380:district
+4381:loft
+4382:empire
+4383:deluxe
+4384:mass
+4385:christie
+4386:codes
+4387:teamwork
+4388:workings
+4389:valmont
+4390:shantytown
+4391:netherlands
+4392:employees
+4393:harvester
+4394:rust
+4395:lounging
+4396:magsafe
+4397:wonder
+4398:jazz
+4399:density
+4400:coop
+4401:cleat_
+4402:kirin
+4403:baguette
+4404:chart
+4405:carburetor
+4406:latam
+4407:gum
+4408:staszow
+4409:hsbc
+4410:mantle
+4411:crystal
+4412:beachfront
+4413:clemson
+4414:khalifa
+4415:dwellings
+4416:hero
+4417:cub,cub_,cubs
+4418:academy
+4419:sunsweet
+4420:tots
+4421:harness
+4422:dressing
+4423:screening
+4424:coral
+4425:sprint
+4426:lineup
+4427:hugo
+4428:streetcar
+4429:Verizon,verizon
+4430:smoking
+4431:compair
+4432:launcher
+4433:acquisisce
+4434:graphic,graphics
+4435:duct
+4436:cosmos
+4437:muslim
+4438:pencil_box
+4439:shepherd_dog
+4440:table-tennis
+4441:hamas
+4442:kremlin
+4443:spin
+4444:totos
+4445:peugeot
+4446:pointing
+4447:ces
+4448:functionality
+4449:combat
+4450:muzzle
+4451:vapormax
+4452:order
+4453:adultery
+4454:veganas
+4455:douro
+4456:Sainsburys
+4457:anna
+4458:transaction
+4459:john
+4460:handrail
+4461:footballer
+4462:travelodge
+4463:maxx
+4464:posing
+4465:death
+4466:26bn
+4467:standing
+4468:microprocessor
+4469:casing
+4470:tissot
+4471:trading
+4472:Superdrug
+4473:getty
+4474:wales
+4475:transit
+4476:cartridge
+4477:pegaworld
+4478:fist
+4479:chainlink
+4480:swinge
+4481:future
+4482:handmade
+4483:powerball
+4484:striped
+4485:por
+4486:care
+4487:president
+4488:vattenfall
+4489:representation
+4490:ratio
+4491:birthplace
+4492:homemade
+4493:angkor
+4494:simpsons
+4495:launching
+4496:meditation
+4497:quick
+4498:inventory
+4499:ventilation
+4500:circo
+4501:packaging
+4502:zone
+4503:steep
+4504:palanquin
+4505:bleak
+4506:pastries
+4507:citigroup
+4508:monopoly
+4509:romania
+4510:workplace
+4511:selfie
+4512:sus
+4513:karl
+4514:thehindu
+4515:feel
+4516:carbon
+4517:mizuno
+4518:shenzhen
+4519:thumbs
+4520:lundunata
+4521:matters
+4522:pita_
+4523:lingerie
+4524:cinnamon
+4525:karate
+4526:racquet
+4527:leader
+4528:hershey
+4529:usda
+4530:blower
+4531:semi
+4532:grind
+4533:religious
+4534:colonnades
+4535:diamond
+4536:snowflake
+4537:paddleboard
+4538:empty
+4539:macbook
+4540:hypercar
+4541:cliffside
+4542:moisturizer
+4543:pilgrimage
+4544:various
+4545:guacamole
+4546:wellness
+4547:jakarta
+4548:picker
+4549:rejection
+4550:album,albums
+4551:win
+4552:noccibe
+4553:sideline
+4554:arizona
+4555:banister
+4556:tapawera
+4557:symphony
+4558:bistro
+4559:dick
+4560:Tastes
+4561:hydro
+4562:majority
+4563:mbc
+4564:snowball
+4565:engie
+4566:nasdaq
+4567:Oneplus
+4568:sandisk
+4569:bowling
+4570:ukraine
+4571:swells
+4572:macaroni
+4573:act
+4574:blow
+4575:fuel
+4576:humayun
+4577:woodworking
+4578:magician
+4579:enfield
+4580:lotterias
+4581:lg
+4582:europa
+4583:furrows
+4584:kiev
+4585:strasbourg
+4586:hello
+4587:greece
+4588:handling
+4589:distancing
+4590:feast
+4591:tribe
+4592:loop,loops
+4593:climber
+4594:mri
+4595:shipyard
+4596:index
+4597:manuscript
+4598:vegas
+4599:hummingbird
+4600:malay
+4601:vaporizer
+4602:spooky
+4603:swarm
+4604:microsystems
+4605:bedpan
+4606:reporter
+4607:palestine
+4608:handicap
+4609:hardtop
+4610:decathlon
+4611:birkenau
+4612:guinness
+4613:donnarumma
+4614:servicepoint
+4615:cardinals
+4616:fisheye
+4617:dip
+4618:michigan
+4619:defibrillator
+4620:destruction
+4621:processing
+4622:brawl
+4623:rockstar
+4624:algae
+4625:wargame
+4626:pont
+4627:bouncy
+4628:throw
+4629:campaign
+4630:opulent
+4631:multimeter
+4632:gps
+4633:discount
+4634:climbing
+4635:weight,weights
+4636:technologies
+4637:tasty
+4638:neglect
+4639:savanna
+4640:mariachi
+4641:guests
+4642:iowa
+4643:goalie
+4644:ages
+4645:grocers
+4646:wealth
+4647:cranberries
+4648:nugget
+4649:drumming
+4650:clash
+4651:turquoise
+4652:makita
+4653:kiss
+4654:express
+4655:petrofac
+4656:engraving
+4657:karcher
+4658:pikachu
+4659:cvs
+4660:pug
+4661:gymnasium
+4662:moto
+4663:gardener
+4664:total
+4665:hawk
+4666:stix
+4667:ios
+4668:decay
+4669:Macarons,macarons
+4670:pavers
+4671:shoppee
+4672:crumbling
+4673:evangelists
+4674:chefchaouen
+4675:poodle
+4676:reliance
+4677:lying
+4678:filtering
+4679:specialties
+4680:itunes
+4681:processor
+4682:imposing
+4683:riyal
+4684:tilework
+4685:sunshine
+4686:shaggy
+4687:ruffle
+4688:obstruction
+4689:carcass
+4690:smartwatches
+4691:opulence
+4692:departure
+4693:bolo
+4694:hives
+4695:badger
+4696:success
+4697:shoal
+4698:boss
+4699:hazards
+4700:sonobuoyo
+4701:polish
+4702:avvala
+4703:pittsburgh
+4704:kayakers
+4705:wildfire
+4706:domain
+4707:festivities
+4708:animation
+4709:immigration
+4710:progress
+4711:triangle
+4712:waffle_iron
+4713:watercraft
+4714:blowing
+4715:gouda
+4716:emirates
+4717:berlin
+4718:traditional
+4719:purchasing
+4720:saudi
+4721:bomb
+4722:winners
+4723:tactics
+4724:amor
+4725:flour
+4726:wordpress
+4727:grow
+4728:echo
+4729:vote
+4730:furnishings
+4731:rigener
+4732:page
+4733:rower
+4734:eclipse
+4735:abbey
+4736:dye
+4737:zip
+4738:quiz
+4739:diagram
+4740:chimpanzee
+4741:f430
+4742:underground
+4743:father
+4744:garbanzo
+4745:newlywed
+4746:saturn
+4747:terracotta
+4748:hoover
+4749:coronavirus
+4750:excavation
+4751:trio
+4752:elements
+4753:spain
+4754:burman
+4755:geese
+4756:remodel
+4757:collapse
+4758:navy
+4759:everton
+4760:renasance
+4761:testing
+4762:elegant
+4763:currencies
+4764:medallion
+4765:menara
+4766:font
+4767:liner
+4768:lattice
+4769:tabasco
+4770:viewer
+4771:spelling
+4772:continent
+4773:charge
+4774:attendees
+4775:velodrome
+4776:finlandia
+4777:arrowhead
+4778:grits
+4779:heliift
+4780:tee
+4781:diving
+4782:daffodil,daffodils
+4783:eos
+4784:sprout
+4785:avenue
+4786:kaaba
+4787:fallen
+4788:encryption
+4789:beatles
+4790:plantagen
+4791:switzerland
+4792:giant
+4793:volksbank
+4794:glitter
+4795:puncher
+4796:eat
+4797:tesco
+4798:virginia
+4799:todaiji
+4800:muellengo
+4801:longines
+4802:soundlink
+4803:aroma
+4804:high
+4805:depiction
+4806:pay
+4807:bbc
+4808:education
+4809:law
+4810:overlook,overlooks
+4811:tutu
+4812:oia
+4813:granola
+4814:vale
+4815:aerial
+4816:haze
+4817:bangs
+4818:burial
+4819:bluebells
+4820:greyhound
+4821:norway
+4822:colgate
+4823:guides
+4824:diversity
+4825:- up
+4826:vane
+4827:stonex
+4828:montevideo
+4829:wattle
+4830:gembong
+4831:opinions
+4832:voice
+4833:sesame
+4834:hanjin
+4835:finland
+4836:tropic
+4837:responsibility
+4838:colossion
+4839:expenses
+4840:auckland
+4841:whisk
+4842:sake
+4843:athleticism
+4844:recliner
+4845:lincoln
+4846:planner
+4847:parangriti
+4848:conservation
+4849:rothenburg
+4850:dwarfs
+4851:political
+4852:papa
+4853:a rose
+4854:hot
+4855:vapor
+4856:phuket
+4857:beaver
+4858:kingdom
+4859:cuckoo
+4860:rafting
+4861:clap
+4862:walk - in
+4863:pulp
+4864:archers
+4865:broadcast
+4866:frappuccino
+4867:server
+4868:motorrad
+4869:composition
+4870:gel
+4871:bunnings
+4872:haunting
+4873:heroes
+4874:share
+4875:apparel
+4876:conditioning
+4877:explosion
+4878:mustang
+4879:colone
+4880:aviation
+4881:value
+4882:shadowy
+4883:nots
+4884:collector
+4885:england
+4886:clutter
+4887:gag
+4888:itza
+4889:commission
+4890:betting
+4891:nasa
+4892:chassis
+4893:login
+4894:savings
+4895:punt
+4896:rani
+4897:cellist
+4898:cacti
+4899:commitment
+4900:paddleboat
+4901:henna
+4902:topping
+4903:kathmandu
+4904:hijabs
+4905:rubik
+4906:smart
+4907:korea
+4908:netting
+4909:oracle
+4910:strength
+4911:carve
+4912:fendi
+4913:hudson
+4914:fairway
+4915:icicles
+4916:firenze
+4917:supporter
+4918:wherever
+4919:groceries
+4920:aldi
+4921:taipei
+4922:freshness
+4923:kind
+4924:congo
+4925:mulberry
+4926:cambridge
+4927:pacific
+4928:recreation
+4929:scouts
+4930:mouthwash
+4931:streetlamp
+4932:marshmallow
+4933:california
+4934:reebok
+4935:halfords
+4936:signature
+4937:oxford
+4938:abu
+4939:spark,sparks
+4940:boi
+4941:flow
+4942:barley
+4943:showhome
+4944:artikel
+4945:fleur
+4946:purolator
+4947:spicy
+4948:length
+4949:poignant
+4950:dart
+4951:milbone
+4952:sunbeam
+4953:strip
+4954:kors
+4955:missoni
+4956:geyser
+4957:poolside
+4958:look
+4959:sheeting
+4960:watches
+4961:mantellassi
+4962:guaraná
+4963:brandy
+4964:malfunction
+4965:grapevines
+4966:pope
+4967:indo
+4968:wildflower
+4969:remains
+4970:hieroglyphs
+4971:canilava
+4972:species
+4973:trump
+4974:manor
+4975:handprint
+4976:trailhead
+4977:hopscotch
+4978:david
+4979:singing
+4980:shanghai
+4981:mangoes
+4982:hue,hues
+4983:segovia
+4984:stryker
+4985:mrt
+4986:fin
+4987:newborn
+4988:seater
+4989:maersk
+4990:thunder
+4991:cbc
+4992:anthologies
+4993:rockefeller
+4994:epic
+4995:sealand
+4996:stewart
+4997:recreational
+4998:asics
+4999:wipes
+5000:cleveland
+5001:chance
+5002:moai
+5003:efficient
+5004:title
+5005:beet
+5006:renaissance
+5007:maison
+5008:leak
+5009:lira
+5010:responders
+5011:indomaret
+5012:headline
+5013:datar
+5014:promotion
+5015:mark,marks
+5016:talkie
+5017:lagerfeld
+5018:popularity
+5019:scorpion
+5020:Paragliders,paragliders
+5021:trout
+5022:kintex
+5023:lumia
+5024:e - plus
+5025:learning
+5026:bnp
+5027:management
+5028:türkenstraße
+5029:bain
+5030:ukulele
+5031:crepe
+5032:embankment
+5033:allexpress
+5034:meizu
+5035:m6s
+5036:sandbox
+5037:francisco
+5038:loading
+5039:launch
+5040:initiative
+5041:huntsman
+5042:resurfacing
+5043:handshaking
+5044:swatch
+5045:iff
+5046:blackpool
+5047:overcoat
+5048:shutterstock
+5049:taman
+5050:descriptions
+5051:gondoliers
+5052:wafer
+5053:rabbi
+5054:hyper
+5055:damp
+5056:l'oreal
+5057:okra
+5058:Superheroes
+5059:unfold
+5060:tequila
+5061:celo
+5062:milan
+5063:sculptor
+5064:sprayer
+5065:carp
+5066:maneuvers
+5067:smartlab
+5068:blog
+5069:breguet
+5070:ace
+5071:corona
+5072:alldays
+5073:kashmiri
+5074:entranceway
+5075:yes
+5076:role
+5077:kuwait
+5078:styling
+5079:Watsons
+5080:dermalow
+5081:illusion
+5082:bulgaria
+5083:delicate
+5084:peppa
+5085:speech
+5086:saree
+5087:gmc
+5088:nook
+5089:basil
+5090:dishsoap
+5091:amd
+5092:ashes
+5093:fishermen
+5094:mississippi
+5095:magnificent
+5096:tipper
+5097:furnace
+5098:confederate
+5099:neck
+5100:airbus
+5101:concerns
+5102:porter
+5103:butterflies
+5104:briefs
+5105:azadi
+5106:nfl
+5107:steamboat
+5108:sector
+5109:quesadilla
+5110:weaving
+5111:muscat
+5112:canvas
+5113:notification
+5114:par
+5115:minion
+5116:factories
+5117:crunchy
+5118:barbershops
+5119:infield
+5120:collectibles
+5121:outcropping
+5122:courtroom
+5123:canary
+5124:walrus
+5125:wework
+5126:donald
+5127:stallhofen
+5128:crock
+5129:gap
+5130:focus
+5131:software
+5132:mardi
+5133:der
+5134:tata
+5135:tik
+5136:chuck
+5137:crumble
+5138:coliseum
+5139:sunburst
+5140:swastika
+5141:gods
+5142:kellogg
+5143:vet
+5144:deezer
+5145:rochelinho
+5146:shining
+5147:kneeling
+5148:bolo_tie
+5149:minh
+5150:huggies
+5151:dawn
+5152:release
+5153:jousting
+5154:housing
+5155:accessories
+5156:cheeseburger
+5157:feeder
+5158:latte
+5159:cartier
+5160:nintendo
+5161:hardback
+5162:petroglyphs
+5163:marine
+5164:elegance
+5165:glassblowing
+5166:c
+5167:sawdust
+5168:brexit
+5169:azz
+5170:vista
+5171:cob
+5172:depo
+5173:past
+5174:des
+5175:moneygram
+5176:protection
+5177:pinball
+5178:paycheck
+5179:crevice
+5180:neymar
+5181:putt
+5182:washing
+5183:alipay
+5184:friendship
+5185:cordless
+5186:diy
+5187:communist
+5188:daimler
+5189:handicrafts
+5190:teleferic
+5191:totoro
+5192:eps
+5193:trends
+5194:mercadona
+5195:clubhouse
+5196:discussion
+5197:bonobo
+5198:azalea
+5199:lycee
+5200:currant
+5201:gopro
+5202:silk
+5203:format
+5204:expression
+5205:gala
+5206:versatility
+5207:horseshoe
+5208:divider
+5209:enforcement
+5210:pickett
+5211:costco
+5212:stalactites
+5213:assange
+5214:alcatraz
+5215:valor
+5216:autodesk
+5217:macau
+5218:barracks
+5219:emporio
+5220:registration
+5221:philadelphia
+5222:bottlega
+5223:cockatoo
+5224:bravery
+5225:citroen
+5226:wisma
+5227:guinea
+5228:biking
+5229:firemen
+5230:mobis
+5231:nesco
+5232:lavandes
+5233:riding
+5234:puma
+5235:stretches
+5236:batteries
+5237:dazs
+5238:francis
+5239:swordfish
+5240:oculus
+5241:bottom
+5242:reserve
+5243:ho
+5244:account
+5245:deltata
+5246:beige
+5247:coles
+5248:cravings
+5249:sonic
+5250:davidson
+5251:dryers
+5252:rescuer
diff --git a/mask_adapter/data/datasets/load_sem_seg.py b/mask_adapter/data/datasets/load_sem_seg.py
new file mode 100644
index 0000000000000000000000000000000000000000..10967b27b74cd7ab94766053bb349077e62059b8
--- /dev/null
+++ b/mask_adapter/data/datasets/load_sem_seg.py
@@ -0,0 +1,90 @@
+
+
+import contextlib
+import datetime
+import os
+import logging
+import numpy as np
+from PIL import Image
+
+from detectron2.utils.file_io import PathManager
+
+
+
+logger = logging.getLogger(__name__)
+
+def load_sem_seg(gt_root, image_root, gt_ext="png", image_ext="jpg", meta = None):
+    """
+    Load semantic segmentation datasets. All files under "gt_root" with "gt_ext" extension are
+    treated as ground truth annotations and all files under "image_root" with "image_ext" extension
+    as input images. Ground truth and input images are matched using file paths relative to
+    "gt_root" and "image_root" respectively without taking into account file extensions.
+    This works for COCO as well as some other datasets.
+
+    Args:
+        gt_root (str): full path to ground truth semantic segmentation files. Semantic segmentation
+            annotations are stored as images with integer values in pixels that represent
+            corresponding semantic labels.
+        image_root (str): the directory where the input images are.
+        gt_ext (str): file extension for ground truth annotations.
+        image_ext (str): file extension for input images.
+
+    Returns:
+        list[dict]:
+            a list of dicts in detectron2 standard format without instance-level
+            annotation.
+
+    Notes:
+        1. This function does not read the image and ground truth files.
+           The results do not have the "image" and "sem_seg" fields.
+    """
+
+    # We match input images with ground truth based on their relative filepaths (without file
+    # extensions) starting from 'image_root' and 'gt_root' respectively.
+    def file2id(folder_path, file_path):
+        # extract relative path starting from `folder_path`
+        image_id = os.path.normpath(os.path.relpath(file_path, start=folder_path))
+        # remove file extension
+        image_id = os.path.splitext(image_id)[0]
+        return image_id
+
+    input_files = sorted(
+        (os.path.join(image_root, f) for f in PathManager.ls(image_root) if f.endswith(image_ext)),
+        key=lambda file_path: file2id(image_root, file_path),
+    )
+    gt_files = sorted(
+        (os.path.join(gt_root, f) for f in PathManager.ls(gt_root) if f.endswith(gt_ext)),
+        key=lambda file_path: file2id(gt_root, file_path),
+    )
+
+    assert len(gt_files) > 0, "No annotations found in {}.".format(gt_root)
+
+    # Use the intersection, so that val2017_100 annotations can run smoothly with val2017 images
+    if len(input_files) != len(gt_files):
+        logger.warn(
+            "Directory {} and {} has {} and {} files, respectively.".format(
+                image_root, gt_root, len(input_files), len(gt_files)
+            )
+        )
+        input_basenames = [os.path.basename(f)[: -len(image_ext)] for f in input_files]
+        gt_basenames = [os.path.basename(f)[: -len(gt_ext)] for f in gt_files]
+        intersect = list(set(input_basenames) & set(gt_basenames))
+        # sort, otherwise each worker may obtain a list[dict] in different order
+        intersect = sorted(intersect)
+        logger.warn("Will use their intersection of {} files.".format(len(intersect)))
+        input_files = [os.path.join(image_root, f + image_ext) for f in intersect]
+        gt_files = [os.path.join(gt_root, f + gt_ext) for f in intersect]
+
+    logger.info(
+        "Loaded {} images with semantic segmentation from {}".format(len(input_files), image_root)
+    )
+
+    dataset_dicts = []
+    for (img_path, gt_path) in zip(input_files, gt_files):
+        record = {}
+        record["file_name"] = img_path
+        record["sem_seg_file_name"] = gt_path 
+        record["meta"] = meta 
+        dataset_dicts.append(record)
+
+    return dataset_dicts
\ No newline at end of file
diff --git a/mask_adapter/data/datasets/lvis_1203_with_prompt_eng.txt b/mask_adapter/data/datasets/lvis_1203_with_prompt_eng.txt
new file mode 100644
index 0000000000000000000000000000000000000000..db80237b5fffcff3b91b6f917f518f14103bff3a
--- /dev/null
+++ b/mask_adapter/data/datasets/lvis_1203_with_prompt_eng.txt
@@ -0,0 +1,1203 @@
+1:aerosol can,spray can
+2:air conditioner
+3:airplane,aeroplane
+4:alarm clock
+5:alcohol,alcoholic beverage
+6:alligator,gator
+7:almond
+8:ambulance
+9:amplifier
+10:anklet,ankle bracelet
+11:antenna,aerial,transmitting aerial
+12:apple
+13:applesauce
+14:apricot
+15:apron
+16:aquarium,fish tank
+17:arctic (type of shoe),galosh,golosh,rubber (type of shoe),gumshoe
+18:armband
+19:armchair
+20:armoire
+21:armor,armour
+22:artichoke
+23:trash can,garbage can,wastebin,dustbin,trash barrel,trash bin
+24:ashtray
+25:asparagus
+26:atomizer,atomiser,spray,sprayer,nebulizer,nebuliser
+27:avocado
+28:award,accolade
+29:awning
+30:ax,axe
+31:baboon
+32:baby buggy,baby carriage,perambulator,pram,stroller
+33:basketball backboard
+34:backpack,knapsack,packsack,rucksack,haversack
+35:handbag,purse,pocketbook
+36:suitcase,baggage,luggage
+37:bagel,beigel
+38:bagpipe
+39:baguet,baguette
+40:bait,lure
+41:ball
+42:ballet skirt,tutu
+43:balloon
+44:bamboo
+45:banana
+46:Band Aid
+47:bandage
+48:bandanna,bandana
+49:banjo
+50:banner,streamer
+51:barbell
+52:barge
+53:barrel,cask
+54:barrette
+55:barrow,garden cart,lawn cart,wheelbarrow
+56:baseball base
+57:baseball
+58:baseball bat
+59:baseball cap,jockey cap,golf cap
+60:baseball glove,baseball mitt
+61:basket,handbasket
+62:basketball
+63:bass horn,sousaphone,tuba
+64:bat (animal)
+65:bath mat
+66:bath towel
+67:bathrobe
+68:bathtub,bathing tub
+69:batter (food)
+70:battery
+71:beachball
+72:bead
+73:bean curd,tofu
+74:beanbag
+75:beanie,beany
+76:bear
+77:bed
+78:bedpan
+79:bedspread,bedcover,bed covering,counterpane,spread
+80:cow
+81:beef (food),boeuf (food)
+82:beeper,pager
+83:beer bottle
+84:beer can
+85:beetle
+86:bell
+87:bell pepper,capsicum
+88:belt
+89:belt buckle
+90:bench
+91:beret
+92:bib
+93:Bible
+94:bicycle,bike (bicycle)
+95:visor,vizor
+96:billboard
+97:binder,ring-binder
+98:binoculars,field glasses,opera glasses
+99:bird
+100:birdfeeder
+101:birdbath
+102:birdcage
+103:birdhouse
+104:birthday cake
+105:birthday card
+106:pirate flag
+107:black sheep
+108:blackberry
+109:blackboard,chalkboard
+110:blanket
+111:blazer,sport jacket,sport coat,sports jacket,sports coat
+112:blender,liquidizer,liquidiser
+113:blimp
+114:blinker,flasher
+115:blouse
+116:blueberry
+117:gameboard
+118:boat,ship (boat)
+119:bob,bobber,bobfloat
+120:bobbin,spool,reel
+121:bobby pin,hairgrip
+122:boiled egg,coddled egg
+123:bolo tie,bolo,bola tie,bola
+124:deadbolt
+125:bolt
+126:bonnet
+127:book
+128:bookcase
+129:booklet,brochure,leaflet,pamphlet
+130:bookmark,bookmarker
+131:boom microphone,microphone boom
+132:boot
+133:bottle
+134:bottle opener
+135:bouquet
+136:bow (weapon)
+137:bow (decorative ribbons)
+138:bow-tie,bowtie
+139:bowl
+140:pipe bowl
+141:bowler hat,bowler,derby hat,derby,plug hat
+142:bowling ball
+143:box
+144:boxing glove
+145:suspenders
+146:bracelet,bangle
+147:brass plaque
+148:brassiere,bra,bandeau
+149:bread-bin,breadbox
+150:bread
+151:breechcloth,breechclout,loincloth
+152:bridal gown,wedding gown,wedding dress
+153:briefcase
+154:broccoli
+155:broach
+156:broom
+157:brownie
+158:brussels sprouts
+159:bubble gum
+160:bucket,pail
+161:horse buggy
+162:horned cow
+163:bulldog
+164:bulldozer,dozer
+165:bullet train
+166:bulletin board,notice board
+167:bulletproof vest
+168:bullhorn,megaphone
+169:bun,roll
+170:bunk bed
+171:buoy
+172:burrito
+173:bus (vehicle),autobus,charabanc,double-decker,motorbus,motorcoach
+174:business card
+175:butter
+176:butterfly
+177:button
+178:cab (taxi),taxi,taxicab
+179:cabana
+180:cabin car,caboose
+181:cabinet
+182:locker,storage locker
+183:cake
+184:calculator
+185:calendar
+186:calf
+187:camcorder
+188:camel
+189:camera
+190:camera lens
+191:camper (vehicle),camping bus,motor home
+192:can,tin can
+193:can opener,tin opener
+194:candle,candlestick
+195:candle holder
+196:candy bar
+197:candy cane
+198:walking cane
+199:canister,cannister
+200:canoe
+201:cantaloup,cantaloupe
+202:canteen
+203:cap (headwear)
+204:bottle cap,cap (container lid)
+205:cape
+206:cappuccino,coffee cappuccino
+207:car (automobile),auto (automobile),automobile
+208:railcar (part of a train),railway car (part of a train),railroad car (part of a train)
+209:elevator car
+210:car battery,automobile battery
+211:identity card
+212:card
+213:cardigan
+214:cargo ship,cargo vessel
+215:carnation
+216:horse carriage
+217:carrot
+218:tote bag
+219:cart
+220:carton
+221:cash register,register (for cash transactions)
+222:casserole
+223:cassette
+224:cast,plaster cast,plaster bandage
+225:cat
+226:cauliflower
+227:cayenne (spice),cayenne pepper (spice),red pepper (spice)
+228:CD player
+229:celery
+230:cellular telephone,cellular phone,cellphone,mobile phone,smart phone
+231:chain mail,ring mail,chain armor,chain armour,ring armor,ring armour
+232:chair
+233:chaise longue,chaise,daybed
+234:chalice
+235:chandelier
+236:chap
+237:checkbook,chequebook
+238:checkerboard
+239:cherry
+240:chessboard
+241:chicken (animal)
+242:chickpea,garbanzo
+243:chili (vegetable),chili pepper (vegetable),chilli (vegetable),chilly (vegetable),chile (vegetable)
+244:chime,gong
+245:chinaware
+246:crisp (potato chip),potato chip
+247:poker chip
+248:chocolate bar
+249:chocolate cake
+250:chocolate milk
+251:chocolate mousse
+252:choker,collar,neckband
+253:chopping board,cutting board,chopping block
+254:chopstick
+255:Christmas tree
+256:slide
+257:cider,cyder
+258:cigar box
+259:cigarette
+260:cigarette case,cigarette pack
+261:cistern,water tank
+262:clarinet
+263:clasp
+264:cleansing agent,cleanser,cleaner
+265:cleat (for securing rope)
+266:clementine
+267:clip
+268:clipboard
+269:clippers (for plants)
+270:cloak
+271:clock,timepiece,timekeeper
+272:clock tower
+273:clothes hamper,laundry basket,clothes basket
+274:clothespin,clothes peg
+275:clutch bag
+276:coaster
+277:coat
+278:coat hanger,clothes hanger,dress hanger
+279:coatrack,hatrack
+280:cock,rooster
+281:cockroach
+282:cocoa (beverage),hot chocolate (beverage),drinking chocolate
+283:coconut,cocoanut
+284:coffee maker,coffee machine
+285:coffee table,cocktail table
+286:coffeepot
+287:coil
+288:coin
+289:colander,cullender
+290:coleslaw,slaw
+291:coloring material,colouring material
+292:combination lock
+293:pacifier,teething ring
+294:comic book
+295:compass
+296:computer keyboard,keyboard (computer)
+297:condiment
+298:cone,traffic cone
+299:control,controller
+300:convertible (automobile)
+301:sofa bed
+302:cooker
+303:cookie,cooky,biscuit (cookie)
+304:cooking utensil
+305:cooler (for food),ice chest
+306:cork (bottle plug),bottle cork
+307:corkboard
+308:corkscrew,bottle screw
+309:edible corn,corn,maize
+310:cornbread
+311:cornet,horn,trumpet
+312:cornice,valance,valance board,pelmet
+313:cornmeal
+314:corset,girdle
+315:costume
+316:cougar,puma,catamount,mountain lion,panther
+317:coverall
+318:cowbell
+319:cowboy hat,ten-gallon hat
+320:crab (animal)
+321:crabmeat
+322:cracker
+323:crape,crepe,French pancake
+324:crate
+325:crayon,wax crayon
+326:cream pitcher
+327:crescent roll,croissant
+328:crib,cot
+329:crock pot,earthenware jar
+330:crossbar
+331:crouton
+332:crow
+333:crowbar,wrecking bar,pry bar
+334:crown
+335:crucifix
+336:cruise ship,cruise liner
+337:police cruiser,patrol car,police car,squad car
+338:crumb
+339:crutch
+340:cub (animal)
+341:cube,square block
+342:cucumber,cuke
+343:cufflink
+344:cup
+345:trophy cup
+346:cupboard,closet
+347:cupcake
+348:hair curler,hair roller,hair crimper
+349:curling iron
+350:curtain,drapery
+351:cushion
+352:cylinder
+353:cymbal
+354:dagger
+355:dalmatian
+356:dartboard
+357:date (fruit)
+358:deck chair,beach chair
+359:deer,cervid
+360:dental floss,floss
+361:desk
+362:detergent
+363:diaper
+364:diary,journal
+365:die,dice
+366:dinghy,dory,rowboat
+367:dining table
+368:tux,tuxedo
+369:dish
+370:dish antenna
+371:dishrag,dishcloth
+372:dishtowel,tea towel
+373:dishwasher,dishwashing machine
+374:dishwasher detergent,dishwashing detergent,dishwashing liquid,dishsoap
+375:dispenser
+376:diving board
+377:Dixie cup,paper cup
+378:dog
+379:dog collar
+380:doll
+381:dollar,dollar bill,one dollar bill
+382:dollhouse,doll's house
+383:dolphin
+384:domestic ass,donkey
+385:doorknob,doorhandle
+386:doormat,welcome mat
+387:doughnut,donut
+388:dove
+389:dragonfly
+390:drawer
+391:underdrawers,boxers,boxershorts
+392:dress,frock
+393:dress hat,high hat,opera hat,silk hat,top hat
+394:dress suit
+395:dresser
+396:drill
+397:drone
+398:dropper,eye dropper
+399:drum (musical instrument)
+400:drumstick
+401:duck
+402:duckling
+403:duct tape
+404:duffel bag,duffle bag,duffel,duffle
+405:dumbbell
+406:dumpster
+407:dustpan
+408:eagle
+409:earphone,earpiece,headphone
+410:earplug
+411:earring
+412:easel
+413:eclair
+414:eel
+415:egg,eggs
+416:egg roll,spring roll
+417:egg yolk,yolk (egg)
+418:eggbeater,eggwhisk
+419:eggplant,aubergine
+420:electric chair
+421:refrigerator
+422:elephant
+423:elk,moose
+424:envelope
+425:eraser
+426:escargot
+427:eyepatch
+428:falcon
+429:fan
+430:faucet,spigot,tap
+431:fedora
+432:ferret
+433:Ferris wheel
+434:ferry,ferryboat
+435:fig (fruit)
+436:fighter jet,fighter aircraft,attack aircraft
+437:figurine
+438:file cabinet,filing cabinet
+439:file (tool)
+440:fire alarm,smoke alarm
+441:fire engine,fire truck
+442:fire extinguisher,extinguisher
+443:fire hose
+444:fireplace
+445:fireplug,fire hydrant,hydrant
+446:first-aid kit
+447:fish
+448:fish (food)
+449:fishbowl,goldfish bowl
+450:fishing rod,fishing pole
+451:flag
+452:flagpole,flagstaff
+453:flamingo
+454:flannel
+455:flap
+456:flash,flashbulb
+457:flashlight,torch
+458:fleece
+459:flip-flop (sandal)
+460:flipper (footwear),fin (footwear)
+461:flower arrangement,floral arrangement
+462:flute glass,champagne flute
+463:foal
+464:folding chair
+465:food processor
+466:football (American)
+467:football helmet
+468:footstool,footrest
+469:fork
+470:forklift
+471:freight car
+472:French toast
+473:freshener,air freshener
+474:frisbee
+475:frog,toad,toad frog
+476:fruit juice
+477:frying pan,frypan,skillet
+478:fudge
+479:funnel
+480:futon
+481:gag,muzzle
+482:garbage
+483:garbage truck
+484:garden hose
+485:gargle,mouthwash
+486:gargoyle
+487:garlic,ail
+488:gasmask,respirator,gas helmet
+489:gazelle
+490:gelatin,jelly
+491:gemstone
+492:generator
+493:giant panda,panda,panda bear
+494:gift wrap
+495:ginger,gingerroot
+496:giraffe
+497:cincture,sash,waistband,waistcloth
+498:glass (drink container),drinking glass
+499:globe
+500:glove
+501:goat
+502:goggles
+503:goldfish
+504:golf club,golf-club
+505:golfcart
+506:gondola (boat)
+507:goose
+508:gorilla
+509:gourd
+510:grape
+511:grater
+512:gravestone,headstone,tombstone
+513:gravy boat,gravy holder
+514:green bean
+515:green onion,spring onion,scallion
+516:griddle
+517:grill,grille,grillwork,radiator grille
+518:grits,hominy grits
+519:grizzly,grizzly bear
+520:grocery bag
+521:guitar
+522:gull,seagull
+523:gun
+524:hairbrush
+525:hairnet
+526:hairpin
+527:halter top
+528:ham,jambon,gammon
+529:hamburger,beefburger,burger
+530:hammer
+531:hammock
+532:hamper
+533:hamster
+534:hair dryer
+535:hand glass,hand mirror
+536:hand towel,face towel
+537:handcart,pushcart,hand truck
+538:handcuff
+539:handkerchief
+540:handle,grip,handgrip
+541:handsaw,carpenter's saw
+542:hardback book,hardcover book
+543:harmonium,organ (musical instrument),reed organ (musical instrument)
+544:hat
+545:hatbox
+546:veil
+547:headband
+548:headboard
+549:headlight,headlamp
+550:headscarf
+551:headset
+552:headstall (for horses),headpiece (for horses)
+553:heart
+554:heater,warmer
+555:helicopter
+556:helmet
+557:heron
+558:highchair,feeding chair
+559:hinge
+560:hippopotamus
+561:hockey stick
+562:hog,pig
+563:home plate (baseball),home base (baseball)
+564:honey
+565:fume hood,exhaust hood
+566:hook
+567:hookah,narghile,nargileh,sheesha,shisha,water pipe
+568:hornet
+569:horse
+570:hose,hosepipe
+571:hot-air balloon
+572:hotplate
+573:hot sauce
+574:hourglass
+575:houseboat
+576:hummingbird
+577:hummus,humus,hommos,hoummos,humous
+578:polar bear
+579:icecream
+580:popsicle
+581:ice maker
+582:ice pack,ice bag
+583:ice skate
+584:igniter,ignitor,lighter
+585:inhaler,inhalator
+586:iPod
+587:iron (for clothing),smoothing iron (for clothing)
+588:ironing board
+589:jacket
+590:jam
+591:jar
+592:jean,blue jean,denim
+593:jeep,landrover
+594:jelly bean,jelly egg
+595:jersey,T-shirt,tee shirt
+596:jet plane,jet-propelled plane
+597:jewel,gem,precious stone
+598:jewelry,jewellery
+599:joystick
+600:jumpsuit
+601:kayak
+602:keg
+603:kennel,doghouse
+604:kettle,boiler
+605:key
+606:keycard
+607:kilt
+608:kimono
+609:kitchen sink
+610:kitchen table
+611:kite
+612:kitten,kitty
+613:kiwi fruit
+614:knee pad
+615:knife
+616:knitting needle
+617:knob
+618:knocker (on a door),doorknocker
+619:koala,koala bear
+620:lab coat,laboratory coat
+621:ladder
+622:ladle
+623:ladybug,ladybeetle,ladybird beetle
+624:lamb (animal)
+625:lamb-chop,lambchop
+626:lamp
+627:lamppost
+628:lampshade
+629:lantern
+630:lanyard,laniard
+631:laptop computer,notebook computer
+632:lasagna,lasagne
+633:latch
+634:lawn mower
+635:leather
+636:legging (clothing),leging (clothing),leg covering
+637:Lego,Lego set
+638:legume
+639:lemon
+640:lemonade
+641:lettuce
+642:license plate,numberplate
+643:life buoy,lifesaver,life belt,life ring
+644:life jacket,life vest
+645:lightbulb
+646:lightning rod,lightning conductor
+647:lime
+648:limousine
+649:lion
+650:lip balm
+651:liquor,spirits,hard liquor,liqueur,cordial
+652:lizard
+653:log
+654:lollipop
+655:speaker (stero equipment)
+656:loveseat
+657:machine gun
+658:magazine
+659:magnet
+660:mail slot
+661:mailbox (at home),letter box (at home)
+662:mallard
+663:mallet
+664:mammoth
+665:manatee
+666:mandarin orange
+667:manger,trough
+668:manhole
+669:map
+670:marker
+671:martini
+672:mascot
+673:mashed potato
+674:masher
+675:mask,facemask
+676:mast
+677:mat (gym equipment),gym mat
+678:matchbox
+679:mattress
+680:measuring cup
+681:measuring stick,ruler (measuring stick),measuring rod
+682:meatball
+683:medicine
+684:melon
+685:microphone
+686:microscope
+687:microwave oven
+688:milestone,milepost
+689:milk
+690:milk can
+691:milkshake
+692:minivan
+693:mint candy
+694:mirror
+695:mitten
+696:mixer (kitchen tool),stand mixer
+697:money
+698:monitor (computer equipment) computer monitor
+699:monkey
+700:motor
+701:motor scooter,scooter
+702:motor vehicle,automotive vehicle
+703:motorcycle
+704:mound (baseball),pitcher's mound
+705:mouse (computer equipment),computer mouse
+706:mousepad
+707:muffin
+708:mug
+709:mushroom
+710:music stool,piano stool
+711:musical instrument,instrument (musical)
+712:nailfile
+713:napkin,table napkin,serviette
+714:neckerchief
+715:necklace
+716:necktie,tie (necktie)
+717:needle
+718:nest
+719:newspaper,paper (newspaper)
+720:newsstand
+721:nightshirt,nightwear,sleepwear,nightclothes
+722:nosebag (for animals),feedbag
+723:noseband (for animals),nosepiece (for animals)
+724:notebook
+725:notepad
+726:nut
+727:nutcracker
+728:oar
+729:octopus (food)
+730:octopus (animal)
+731:oil lamp,kerosene lamp,kerosine lamp
+732:olive oil
+733:omelet,omelette
+734:onion
+735:orange (fruit)
+736:orange juice
+737:ostrich
+738:ottoman,pouf,pouffe,hassock
+739:oven
+740:overalls (clothing)
+741:owl
+742:packet
+743:inkpad,inking pad,stamp pad
+744:pad
+745:paddle,boat paddle
+746:padlock
+747:paintbrush
+748:painting
+749:pajamas,pyjamas
+750:palette,pallet
+751:pan (for cooking),cooking pan
+752:pan (metal container)
+753:pancake
+754:pantyhose
+755:papaya
+756:paper plate
+757:paper towel
+758:paperback book,paper-back book,softback book,soft-cover book
+759:paperweight
+760:parachute
+761:parakeet,parrakeet,parroket,paraquet,paroquet,parroquet
+762:parasail (sports)
+763:parasol,sunshade
+764:parchment
+765:parka,anorak
+766:parking meter
+767:parrot
+768:passenger car (part of a train),coach (part of a train)
+769:passenger ship
+770:passport
+771:pastry
+772:patty (food)
+773:pea (food)
+774:peach
+775:peanut butter
+776:pear
+777:peeler (tool for fruit and vegetables)
+778:wooden leg,pegleg
+779:pegboard
+780:pelican
+781:pen
+782:pencil
+783:pencil box,pencil case
+784:pencil sharpener
+785:pendulum
+786:penguin
+787:pennant
+788:penny (coin)
+789:pepper,peppercorn
+790:pepper mill,pepper grinder
+791:perfume
+792:persimmon
+793:person,baby,child,boy,girl,man,woman,human
+794:pet
+795:pew (church bench),church bench
+796:phonebook,telephone book,telephone directory
+797:phonograph record,phonograph recording,record (phonograph recording)
+798:piano
+799:pickle
+800:pickup truck
+801:pie
+802:pigeon
+803:piggy bank,penny bank
+804:pillow
+805:pin (non jewelry)
+806:pineapple
+807:pinecone
+808:ping-pong ball
+809:pinwheel
+810:tobacco pipe
+811:pipe,piping
+812:pistol,handgun
+813:pita (bread),pocket bread
+814:pitcher (vessel for liquid),ewer
+815:pitchfork
+816:pizza
+817:place mat
+818:plate
+819:platter
+820:playpen
+821:pliers,plyers
+822:plow (farm equipment),plough (farm equipment)
+823:plume
+824:pocket watch
+825:pocketknife
+826:poker (fire stirring tool),stove poker,fire hook
+827:pole,post
+828:polo shirt,sport shirt
+829:poncho
+830:pony
+831:pool table,billiard table,snooker table
+832:pop (soda),soda (pop),tonic,soft drink
+833:postbox (public),mailbox (public)
+834:postcard,postal card,mailing-card
+835:poster,placard
+836:pot
+837:flowerpot
+838:potato
+839:potholder
+840:pottery,clayware
+841:pouch
+842:power shovel,excavator,digger
+843:prawn,shrimp
+844:pretzel
+845:printer,printing machine
+846:projectile (weapon),missile
+847:projector
+848:propeller,propellor
+849:prune
+850:pudding
+851:puffer (fish),pufferfish,blowfish,globefish
+852:puffin
+853:pug-dog
+854:pumpkin
+855:puncher
+856:puppet,marionette
+857:puppy
+858:quesadilla
+859:quiche
+860:quilt,comforter
+861:rabbit
+862:race car,racing car
+863:racket,racquet
+864:radar
+865:radiator
+866:radio receiver,radio set,radio,tuner (radio)
+867:radish,daikon
+868:raft
+869:rag doll
+870:raincoat,waterproof jacket
+871:ram (animal)
+872:raspberry
+873:rat
+874:razorblade
+875:reamer (juicer),juicer,juice reamer
+876:rearview mirror
+877:receipt
+878:recliner,reclining chair,lounger (chair)
+879:record player,phonograph (record player),turntable
+880:reflector
+881:remote control
+882:rhinoceros
+883:rib (food)
+884:rifle
+885:ring
+886:river boat
+887:road map
+888:robe
+889:rocking chair
+890:rodent
+891:roller skate
+892:Rollerblade
+893:rolling pin
+894:root beer
+895:router (computer equipment)
+896:rubber band,elastic band
+897:runner (carpet)
+898:plastic bag,paper bag
+899:saddle (on an animal)
+900:saddle blanket,saddlecloth,horse blanket
+901:saddlebag
+902:safety pin
+903:sail
+904:salad
+905:salad plate,salad bowl
+906:salami
+907:salmon (fish)
+908:salmon (food)
+909:salsa
+910:saltshaker
+911:sandal (type of shoe)
+912:sandwich
+913:satchel
+914:saucepan
+915:saucer
+916:sausage
+917:sawhorse,sawbuck
+918:saxophone
+919:scale (measuring instrument)
+920:scarecrow,strawman
+921:scarf
+922:school bus
+923:scissors
+924:scoreboard
+925:scraper
+926:screwdriver
+927:scrubbing brush
+928:sculpture
+929:seabird,seafowl
+930:seahorse
+931:seaplane,hydroplane
+932:seashell
+933:sewing machine
+934:shaker
+935:shampoo
+936:shark
+937:sharpener
+938:Sharpie
+939:shaver (electric),electric shaver,electric razor
+940:shaving cream,shaving soap
+941:shawl
+942:shears
+943:sheep
+944:shepherd dog,sheepdog
+945:sherbert,sherbet
+946:shield
+947:shirt
+948:shoe,sneaker (type of shoe),tennis shoe
+949:shopping bag
+950:shopping cart
+951:short pants,shorts (clothing),trunks (clothing)
+952:shot glass
+953:shoulder bag
+954:shovel
+955:shower head
+956:shower cap
+957:shower curtain
+958:shredder (for paper)
+959:signboard
+960:silo
+961:sink
+962:skateboard
+963:skewer
+964:ski
+965:ski boot
+966:ski parka,ski jacket
+967:ski pole
+968:skirt
+969:skullcap
+970:sled,sledge,sleigh
+971:sleeping bag
+972:sling (bandage),triangular bandage
+973:slipper (footwear),carpet slipper (footwear)
+974:smoothie
+975:snake,serpent
+976:snowboard
+977:snowman
+978:snowmobile
+979:soap
+980:soccer ball
+981:sock
+982:sofa,couch,lounge
+983:softball
+984:solar array,solar battery,solar panel
+985:sombrero
+986:soup
+987:soup bowl
+988:soupspoon
+989:sour cream,soured cream
+990:soya milk,soybean milk,soymilk
+991:space shuttle
+992:sparkler (fireworks)
+993:spatula
+994:spear,lance
+995:spectacles,specs,eyeglasses,glasses
+996:spice rack
+997:spider
+998:crawfish,crayfish
+999:sponge
+1000:spoon
+1001:sportswear,athletic wear,activewear
+1002:spotlight
+1003:squid (food),calamari,calamary
+1004:squirrel
+1005:stagecoach
+1006:stapler (stapling machine)
+1007:starfish,sea star
+1008:statue (sculpture)
+1009:steak (food)
+1010:steak knife
+1011:steering wheel
+1012:stepladder
+1013:step stool
+1014:stereo (sound system)
+1015:stew
+1016:stirrer
+1017:stirrup
+1018:stool
+1019:stop sign
+1020:brake light
+1021:stove,kitchen stove,range (kitchen appliance),kitchen range,cooking stove
+1022:strainer
+1023:strap
+1024:straw (for drinking),drinking straw
+1025:strawberry
+1026:street sign
+1027:streetlight,street lamp
+1028:string cheese
+1029:stylus
+1030:subwoofer
+1031:sugar bowl
+1032:sugarcane (plant)
+1033:suit (clothing)
+1034:sunflower
+1035:sunglasses
+1036:sunhat
+1037:surfboard
+1038:sushi
+1039:mop
+1040:sweat pants
+1041:sweatband
+1042:sweater
+1043:sweatshirt
+1044:sweet potato
+1045:swimsuit,swimwear,bathing suit,swimming costume,bathing costume,swimming trunks,bathing trunks
+1046:sword
+1047:syringe
+1048:Tabasco sauce
+1049:table-tennis table,ping-pong table
+1050:table
+1051:table lamp
+1052:tablecloth
+1053:tachometer
+1054:taco
+1055:tag
+1056:taillight,rear light
+1057:tambourine
+1058:army tank,armored combat vehicle,armoured combat vehicle
+1059:tank (storage vessel),storage tank
+1060:tank top (clothing)
+1061:tape (sticky cloth or paper)
+1062:tape measure,measuring tape
+1063:tapestry
+1064:tarp
+1065:tartan,plaid
+1066:tassel
+1067:tea bag
+1068:teacup
+1069:teakettle
+1070:teapot
+1071:teddy bear
+1072:telephone,phone,telephone set
+1073:telephone booth,phone booth,call box,telephone box,telephone kiosk
+1074:telephone pole,telegraph pole,telegraph post
+1075:telephoto lens,zoom lens
+1076:television camera,tv camera
+1077:television set,tv,tv set
+1078:tennis ball
+1079:tennis racket
+1080:tequila
+1081:thermometer
+1082:thermos bottle
+1083:thermostat
+1084:thimble
+1085:thread,yarn
+1086:thumbtack,drawing pin,pushpin
+1087:tiara
+1088:tiger
+1089:tights (clothing),leotards
+1090:timer,stopwatch
+1091:tinfoil
+1092:tinsel
+1093:tissue paper
+1094:toast (food)
+1095:toaster
+1096:toaster oven
+1097:toilet
+1098:toilet tissue,toilet paper,bathroom tissue
+1099:tomato
+1100:tongs
+1101:toolbox
+1102:toothbrush
+1103:toothpaste
+1104:toothpick
+1105:cover
+1106:tortilla
+1107:tow truck
+1108:towel
+1109:towel rack,towel rail,towel bar
+1110:toy
+1111:tractor (farm equipment)
+1112:traffic light
+1113:dirt bike
+1114:trailer truck,tractor trailer,trucking rig,articulated lorry,semi truck
+1115:train (railroad vehicle),railroad train
+1116:trampoline
+1117:tray
+1118:trench coat
+1119:triangle (musical instrument)
+1120:tricycle
+1121:tripod
+1122:trousers,pants (clothing)
+1123:truck
+1124:truffle (chocolate),chocolate truffle
+1125:trunk
+1126:vat
+1127:turban
+1128:turkey (food)
+1129:turnip
+1130:turtle
+1131:turtleneck (clothing),polo-neck
+1132:typewriter
+1133:umbrella
+1134:underwear,underclothes,underclothing,underpants
+1135:unicycle
+1136:urinal
+1137:urn
+1138:vacuum cleaner
+1139:vase
+1140:vending machine
+1141:vent,blowhole,air vent
+1142:vest,waistcoat
+1143:videotape
+1144:vinegar
+1145:violin,fiddle
+1146:vodka
+1147:volleyball
+1148:vulture
+1149:waffle
+1150:waffle iron
+1151:wagon
+1152:wagon wheel
+1153:walking stick
+1154:wall clock
+1155:wall socket,wall plug,electric outlet,electrical outlet,outlet,electric receptacle
+1156:wallet,billfold
+1157:walrus
+1158:wardrobe
+1159:washbasin,basin (for washing),washbowl,washstand,handbasin
+1160:automatic washer,washing machine
+1161:watch,wristwatch
+1162:water bottle
+1163:water cooler
+1164:water faucet,water tap,tap (water faucet)
+1165:water heater,hot-water heater
+1166:water jug
+1167:water gun,squirt gun
+1168:water scooter,sea scooter,jet ski
+1169:water ski
+1170:water tower
+1171:watering can
+1172:watermelon
+1173:weathervane,vane (weathervane),wind vane
+1174:webcam
+1175:wedding cake,bridecake
+1176:wedding ring,wedding band
+1177:wet suit
+1178:wheel
+1179:wheelchair
+1180:whipped cream
+1181:whistle
+1182:wig
+1183:wind chime
+1184:windmill
+1185:window box (for plants)
+1186:windshield wiper,windscreen wiper,wiper (for windshield/screen)
+1187:windsock,air sock,air-sleeve,wind sleeve,wind cone
+1188:wine bottle
+1189:wine bucket,wine cooler
+1190:wineglass
+1191:blinder (for horses)
+1192:wok
+1193:wolf
+1194:wooden spoon
+1195:wreath
+1196:wrench,spanner
+1197:wristband
+1198:wristlet,wrist band
+1199:yacht
+1200:yogurt,yoghurt,yoghourt
+1201:yoke (animal equipment)
+1202:zebra
+1203:zucchini,courgette
\ No newline at end of file
diff --git a/mask_adapter/data/datasets/mapillary_vistas_with_prompt_eng.txt b/mask_adapter/data/datasets/mapillary_vistas_with_prompt_eng.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d8d5ba17ac1bbddef0605d039d012cc873118870
--- /dev/null
+++ b/mask_adapter/data/datasets/mapillary_vistas_with_prompt_eng.txt
@@ -0,0 +1,66 @@
+0:invalid_class_id
+1:bird,birds
+2:ground animal,ground animate being,dog,cat,horse,cow,sheep,zebra,giraffe
+3:curb,curbs
+4:fence,fences
+5:guard rail
+6:barrier
+7:wall,walls,brick wall,stone wall,interior wall
+8:bike Lane
+9:crosswalk
+10:curb cut
+11:parking
+12:pedestrian area
+13:rail track
+14:road
+15:service lane
+16:sidewalk,pavement
+17:bridge
+18:building,buildings
+19:tunnel
+20:person,child,girl,boy,woman,man,people,children,girls,boys,women,men,lady,guy,ladies,guys,clothes
+21:bicyclist,bicyclists
+22:motorcyclist,motorcyclists
+23:other rider,rider
+24:lane marking of crosswalk
+25:lane marking
+26:mountain,mountains
+27:sand
+28:sky,clouds
+29:snow
+30:terrain,river,sea,grass,dirt,rock
+31:vegetation,tree,trees,palm tree,bushes
+32:water
+33:banner,streamer
+34:bench,benches
+35:bike rack
+36:billboard,hoarding
+37:catch basin
+38:cctv camera,cctv
+39:fire hydrant,fireplug,plug
+40:junction box
+41:mailbox,postbox,mailbox,letter box
+42:manhole
+43:phone booth,telephone booth,call box,telephone box,telephone kiosk
+44:pothole
+45:street light
+46:pole
+47:traffic sign frame
+48:utility pole
+49:traffic light,traffic signal,traffic lights
+50:traffic sign (back),back of traffic sign,traffic sign back
+51:traffic sign (front),front of traffic sign,traffic sign front
+52:trash can,ashcan,garbage can,wastebin,ash bin,ash-bin,ashbin,dustbin,trash barrel,trash bin
+53:bicycle,bike
+54:boat
+55:bus,autobus,double-decker,jitney,motorbus,motorcoach,omnibus,passenger vehicle
+56:car,automobile,cars
+57:caravan
+58:motorcycle,motorcycles
+59:on rails
+60:other vehicle,vehicle
+61:trailer
+62:truck,motortruck
+63:wheeled slow
+64:car mount
+65:ego vehicle
\ No newline at end of file
diff --git a/mask_adapter/data/datasets/openseg_classes.py b/mask_adapter/data/datasets/openseg_classes.py
new file mode 100644
index 0000000000000000000000000000000000000000..dbc58b086e62e3929de9ded6c6f79d2c3cdb848c
--- /dev/null
+++ b/mask_adapter/data/datasets/openseg_classes.py
@@ -0,0 +1,2492 @@
+"""
+Copyright (2023) Bytedance Ltd. and/or its affiliates
+
+Licensed under the Apache License, Version 2.0 (the "License"); 
+you may not use this file except in compliance with the License. 
+You may obtain a copy of the License at 
+
+    http://www.apache.org/licenses/LICENSE-2.0 
+
+Unless required by applicable law or agreed to in writing, software 
+distributed under the License is distributed on an "AS IS" BASIS, 
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+See the License for the specific language governing permissions and 
+limitations under the License. 
+"""
+
+import copy
+COCO_CATEGORIES = [
+    {"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "person"},
+    {"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "bicycle"},
+    {"color": [0, 0, 142], "isthing": 1, "id": 3, "name": "car"},
+    {"color": [0, 0, 230], "isthing": 1, "id": 4, "name": "motorcycle"},
+    {"color": [106, 0, 228], "isthing": 1, "id": 5, "name": "airplane"},
+    {"color": [0, 60, 100], "isthing": 1, "id": 6, "name": "bus"},
+    {"color": [0, 80, 100], "isthing": 1, "id": 7, "name": "train"},
+    {"color": [0, 0, 70], "isthing": 1, "id": 8, "name": "truck"},
+    {"color": [0, 0, 192], "isthing": 1, "id": 9, "name": "boat"},
+    {"color": [250, 170, 30], "isthing": 1, "id": 10, "name": "traffic light"},
+    {"color": [100, 170, 30], "isthing": 1, "id": 11, "name": "fire hydrant"},
+    {"color": [220, 220, 0], "isthing": 1, "id": 13, "name": "stop sign"},
+    {"color": [175, 116, 175], "isthing": 1, "id": 14, "name": "parking meter"},
+    {"color": [250, 0, 30], "isthing": 1, "id": 15, "name": "bench"},
+    {"color": [165, 42, 42], "isthing": 1, "id": 16, "name": "bird"},
+    {"color": [255, 77, 255], "isthing": 1, "id": 17, "name": "cat"},
+    {"color": [0, 226, 252], "isthing": 1, "id": 18, "name": "dog"},
+    {"color": [182, 182, 255], "isthing": 1, "id": 19, "name": "horse"},
+    {"color": [0, 82, 0], "isthing": 1, "id": 20, "name": "sheep"},
+    {"color": [120, 166, 157], "isthing": 1, "id": 21, "name": "cow"},
+    {"color": [110, 76, 0], "isthing": 1, "id": 22, "name": "elephant"},
+    {"color": [174, 57, 255], "isthing": 1, "id": 23, "name": "bear"},
+    {"color": [199, 100, 0], "isthing": 1, "id": 24, "name": "zebra"},
+    {"color": [72, 0, 118], "isthing": 1, "id": 25, "name": "giraffe"},
+    {"color": [255, 179, 240], "isthing": 1, "id": 27, "name": "backpack"},
+    {"color": [0, 125, 92], "isthing": 1, "id": 28, "name": "umbrella"},
+    {"color": [209, 0, 151], "isthing": 1, "id": 31, "name": "handbag"},
+    {"color": [188, 208, 182], "isthing": 1, "id": 32, "name": "tie"},
+    {"color": [0, 220, 176], "isthing": 1, "id": 33, "name": "suitcase"},
+    {"color": [255, 99, 164], "isthing": 1, "id": 34, "name": "frisbee"},
+    {"color": [92, 0, 73], "isthing": 1, "id": 35, "name": "skis"},
+    {"color": [133, 129, 255], "isthing": 1, "id": 36, "name": "snowboard"},
+    {"color": [78, 180, 255], "isthing": 1, "id": 37, "name": "sports ball"},
+    {"color": [0, 228, 0], "isthing": 1, "id": 38, "name": "kite"},
+    {"color": [174, 255, 243], "isthing": 1, "id": 39, "name": "baseball bat"},
+    {"color": [45, 89, 255], "isthing": 1, "id": 40, "name": "baseball glove"},
+    {"color": [134, 134, 103], "isthing": 1, "id": 41, "name": "skateboard"},
+    {"color": [145, 148, 174], "isthing": 1, "id": 42, "name": "surfboard"},
+    {"color": [255, 208, 186], "isthing": 1, "id": 43, "name": "tennis racket"},
+    {"color": [197, 226, 255], "isthing": 1, "id": 44, "name": "bottle"},
+    {"color": [171, 134, 1], "isthing": 1, "id": 46, "name": "wine glass"},
+    {"color": [109, 63, 54], "isthing": 1, "id": 47, "name": "cup"},
+    {"color": [207, 138, 255], "isthing": 1, "id": 48, "name": "fork"},
+    {"color": [151, 0, 95], "isthing": 1, "id": 49, "name": "knife"},
+    {"color": [9, 80, 61], "isthing": 1, "id": 50, "name": "spoon"},
+    {"color": [84, 105, 51], "isthing": 1, "id": 51, "name": "bowl"},
+    {"color": [74, 65, 105], "isthing": 1, "id": 52, "name": "banana"},
+    {"color": [166, 196, 102], "isthing": 1, "id": 53, "name": "apple"},
+    {"color": [208, 195, 210], "isthing": 1, "id": 54, "name": "sandwich"},
+    {"color": [255, 109, 65], "isthing": 1, "id": 55, "name": "orange"},
+    {"color": [0, 143, 149], "isthing": 1, "id": 56, "name": "broccoli"},
+    {"color": [179, 0, 194], "isthing": 1, "id": 57, "name": "carrot"},
+    {"color": [209, 99, 106], "isthing": 1, "id": 58, "name": "hot dog"},
+    {"color": [5, 121, 0], "isthing": 1, "id": 59, "name": "pizza"},
+    {"color": [227, 255, 205], "isthing": 1, "id": 60, "name": "donut"},
+    {"color": [147, 186, 208], "isthing": 1, "id": 61, "name": "cake"},
+    {"color": [153, 69, 1], "isthing": 1, "id": 62, "name": "chair"},
+    {"color": [3, 95, 161], "isthing": 1, "id": 63, "name": "couch"},
+    {"color": [163, 255, 0], "isthing": 1, "id": 64, "name": "potted plant"},
+    {"color": [119, 0, 170], "isthing": 1, "id": 65, "name": "bed"},
+    {"color": [0, 182, 199], "isthing": 1, "id": 67, "name": "dining table"},
+    {"color": [0, 165, 120], "isthing": 1, "id": 70, "name": "toilet"},
+    {"color": [183, 130, 88], "isthing": 1, "id": 72, "name": "tv"},
+    {"color": [95, 32, 0], "isthing": 1, "id": 73, "name": "laptop"},
+    {"color": [130, 114, 135], "isthing": 1, "id": 74, "name": "mouse"},
+    {"color": [110, 129, 133], "isthing": 1, "id": 75, "name": "remote"},
+    {"color": [166, 74, 118], "isthing": 1, "id": 76, "name": "keyboard"},
+    {"color": [219, 142, 185], "isthing": 1, "id": 77, "name": "cell phone"},
+    {"color": [79, 210, 114], "isthing": 1, "id": 78, "name": "microwave"},
+    {"color": [178, 90, 62], "isthing": 1, "id": 79, "name": "oven"},
+    {"color": [65, 70, 15], "isthing": 1, "id": 80, "name": "toaster"},
+    {"color": [127, 167, 115], "isthing": 1, "id": 81, "name": "sink"},
+    {"color": [59, 105, 106], "isthing": 1, "id": 82, "name": "refrigerator"},
+    {"color": [142, 108, 45], "isthing": 1, "id": 84, "name": "book"},
+    {"color": [196, 172, 0], "isthing": 1, "id": 85, "name": "clock"},
+    {"color": [95, 54, 80], "isthing": 1, "id": 86, "name": "vase"},
+    {"color": [128, 76, 255], "isthing": 1, "id": 87, "name": "scissors"},
+    {"color": [201, 57, 1], "isthing": 1, "id": 88, "name": "teddy bear"},
+    {"color": [246, 0, 122], "isthing": 1, "id": 89, "name": "hair drier"},
+    {"color": [191, 162, 208], "isthing": 1, "id": 90, "name": "toothbrush"},
+    {"color": [255, 255, 128], "isthing": 0, "id": 92, "name": "banner"},
+    {"color": [147, 211, 203], "isthing": 0, "id": 93, "name": "blanket"},
+    {"color": [150, 100, 100], "isthing": 0, "id": 95, "name": "bridge"},
+    {"color": [168, 171, 172], "isthing": 0, "id": 100, "name": "cardboard"},
+    {"color": [146, 112, 198], "isthing": 0, "id": 107, "name": "counter"},
+    {"color": [210, 170, 100], "isthing": 0, "id": 109, "name": "curtain"},
+    {"color": [92, 136, 89], "isthing": 0, "id": 112, "name": "door-stuff"},
+    {"color": [218, 88, 184], "isthing": 0, "id": 118, "name": "floor-wood"},
+    {"color": [241, 129, 0], "isthing": 0, "id": 119, "name": "flower"},
+    {"color": [217, 17, 255], "isthing": 0, "id": 122, "name": "fruit"},
+    {"color": [124, 74, 181], "isthing": 0, "id": 125, "name": "gravel"},
+    {"color": [70, 70, 70], "isthing": 0, "id": 128, "name": "house"},
+    {"color": [255, 228, 255], "isthing": 0, "id": 130, "name": "light"},
+    {"color": [154, 208, 0], "isthing": 0, "id": 133, "name": "mirror-stuff"},
+    {"color": [193, 0, 92], "isthing": 0, "id": 138, "name": "net"},
+    {"color": [76, 91, 113], "isthing": 0, "id": 141, "name": "pillow"},
+    {"color": [255, 180, 195], "isthing": 0, "id": 144, "name": "platform"},
+    {"color": [106, 154, 176], "isthing": 0, "id": 145, "name": "playingfield"},
+    {"color": [230, 150, 140], "isthing": 0, "id": 147, "name": "railroad"},
+    {"color": [60, 143, 255], "isthing": 0, "id": 148, "name": "river"},
+    {"color": [128, 64, 128], "isthing": 0, "id": 149, "name": "road"},
+    {"color": [92, 82, 55], "isthing": 0, "id": 151, "name": "roof"},
+    {"color": [254, 212, 124], "isthing": 0, "id": 154, "name": "sand"},
+    {"color": [73, 77, 174], "isthing": 0, "id": 155, "name": "sea"},
+    {"color": [255, 160, 98], "isthing": 0, "id": 156, "name": "shelf"},
+    {"color": [255, 255, 255], "isthing": 0, "id": 159, "name": "snow"},
+    {"color": [104, 84, 109], "isthing": 0, "id": 161, "name": "stairs"},
+    {"color": [169, 164, 131], "isthing": 0, "id": 166, "name": "tent"},
+    {"color": [225, 199, 255], "isthing": 0, "id": 168, "name": "towel"},
+    {"color": [137, 54, 74], "isthing": 0, "id": 171, "name": "wall-brick"},
+    {"color": [135, 158, 223], "isthing": 0, "id": 175, "name": "wall-stone"},
+    {"color": [7, 246, 231], "isthing": 0, "id": 176, "name": "wall-tile"},
+    {"color": [107, 255, 200], "isthing": 0, "id": 177, "name": "wall-wood"},
+    {"color": [58, 41, 149], "isthing": 0, "id": 178, "name": "water-other"},
+    {"color": [183, 121, 142], "isthing": 0, "id": 180, "name": "window-blind"},
+    {"color": [255, 73, 97], "isthing": 0, "id": 181, "name": "window-other"},
+    {"color": [107, 142, 35], "isthing": 0, "id": 184, "name": "tree-merged"},
+    {"color": [190, 153, 153], "isthing": 0, "id": 185, "name": "fence-merged"},
+    {"color": [146, 139, 141], "isthing": 0, "id": 186, "name": "ceiling-merged"},
+    {"color": [70, 130, 180], "isthing": 0, "id": 187, "name": "sky-other-merged"},
+    {"color": [134, 199, 156], "isthing": 0, "id": 188, "name": "cabinet-merged"},
+    {"color": [209, 226, 140], "isthing": 0, "id": 189, "name": "table-merged"},
+    {"color": [96, 36, 108], "isthing": 0, "id": 190, "name": "floor-other-merged"},
+    {"color": [96, 96, 96], "isthing": 0, "id": 191, "name": "pavement-merged"},
+    {"color": [64, 170, 64], "isthing": 0, "id": 192, "name": "mountain-merged"},
+    {"color": [152, 251, 152], "isthing": 0, "id": 193, "name": "grass-merged"},
+    {"color": [208, 229, 228], "isthing": 0, "id": 194, "name": "dirt-merged"},
+    {"color": [206, 186, 171], "isthing": 0, "id": 195, "name": "paper-merged"},
+    {"color": [152, 161, 64], "isthing": 0, "id": 196, "name": "food-other-merged"},
+    {"color": [116, 112, 0], "isthing": 0, "id": 197, "name": "building-other-merged"},
+    {"color": [0, 114, 143], "isthing": 0, "id": 198, "name": "rock-merged"},
+    {"color": [102, 102, 156], "isthing": 0, "id": 199, "name": "wall-other-merged"},
+    {"color": [250, 141, 255], "isthing": 0, "id": 200, "name": "rug-merged"},
+]
+
+ADE20K_150_CATEGORIES = [
+    {"color": [120, 120, 120], "id": 0, "isthing": 0, "name": "wall"},
+    {"color": [180, 120, 120], "id": 1, "isthing": 0, "name": "building"},
+    {"color": [6, 230, 230], "id": 2, "isthing": 0, "name": "sky"},
+    {"color": [80, 50, 50], "id": 3, "isthing": 0, "name": "floor"},
+    {"color": [4, 200, 3], "id": 4, "isthing": 0, "name": "tree"},
+    {"color": [120, 120, 80], "id": 5, "isthing": 0, "name": "ceiling"},
+    {"color": [140, 140, 140], "id": 6, "isthing": 0, "name": "road, route"},
+    {"color": [204, 5, 255], "id": 7, "isthing": 1, "name": "bed"},
+    {"color": [230, 230, 230], "id": 8, "isthing": 1, "name": "window "},
+    {"color": [4, 250, 7], "id": 9, "isthing": 0, "name": "grass"},
+    {"color": [224, 5, 255], "id": 10, "isthing": 1, "name": "cabinet"},
+    {"color": [235, 255, 7], "id": 11, "isthing": 0, "name": "sidewalk, pavement"},
+    {"color": [150, 5, 61], "id": 12, "isthing": 1, "name": "person"},
+    {"color": [120, 120, 70], "id": 13, "isthing": 0, "name": "earth, ground"},
+    {"color": [8, 255, 51], "id": 14, "isthing": 1, "name": "door"},
+    {"color": [255, 6, 82], "id": 15, "isthing": 1, "name": "table"},
+    {"color": [143, 255, 140], "id": 16, "isthing": 0, "name": "mountain, mount"},
+    {"color": [204, 255, 4], "id": 17, "isthing": 0, "name": "plant"},
+    {"color": [255, 51, 7], "id": 18, "isthing": 1, "name": "curtain"},
+    {"color": [204, 70, 3], "id": 19, "isthing": 1, "name": "chair"},
+    {"color": [0, 102, 200], "id": 20, "isthing": 1, "name": "car"},
+    {"color": [61, 230, 250], "id": 21, "isthing": 0, "name": "water"},
+    {"color": [255, 6, 51], "id": 22, "isthing": 1, "name": "painting, picture"},
+    {"color": [11, 102, 255], "id": 23, "isthing": 1, "name": "sofa"},
+    {"color": [255, 7, 71], "id": 24, "isthing": 1, "name": "shelf"},
+    {"color": [255, 9, 224], "id": 25, "isthing": 0, "name": "house"},
+    {"color": [9, 7, 230], "id": 26, "isthing": 0, "name": "sea"},
+    {"color": [220, 220, 220], "id": 27, "isthing": 1, "name": "mirror"},
+    {"color": [255, 9, 92], "id": 28, "isthing": 0, "name": "rug"},
+    {"color": [112, 9, 255], "id": 29, "isthing": 0, "name": "field"},
+    {"color": [8, 255, 214], "id": 30, "isthing": 1, "name": "armchair"},
+    {"color": [7, 255, 224], "id": 31, "isthing": 1, "name": "seat"},
+    {"color": [255, 184, 6], "id": 32, "isthing": 1, "name": "fence"},
+    {"color": [10, 255, 71], "id": 33, "isthing": 1, "name": "desk"},
+    {"color": [255, 41, 10], "id": 34, "isthing": 0, "name": "rock, stone"},
+    {"color": [7, 255, 255], "id": 35, "isthing": 1, "name": "wardrobe, closet, press"},
+    {"color": [224, 255, 8], "id": 36, "isthing": 1, "name": "lamp"},
+    {"color": [102, 8, 255], "id": 37, "isthing": 1, "name": "tub"},
+    {"color": [255, 61, 6], "id": 38, "isthing": 1, "name": "rail"},
+    {"color": [255, 194, 7], "id": 39, "isthing": 1, "name": "cushion"},
+    {"color": [255, 122, 8], "id": 40, "isthing": 0, "name": "base, pedestal, stand"},
+    {"color": [0, 255, 20], "id": 41, "isthing": 1, "name": "box"},
+    {"color": [255, 8, 41], "id": 42, "isthing": 1, "name": "column, pillar"},
+    {"color": [255, 5, 153], "id": 43, "isthing": 1, "name": "signboard, sign"},
+    {
+        "color": [6, 51, 255],
+        "id": 44,
+        "isthing": 1,
+        "name": "chest of drawers, chest, bureau, dresser",
+    },
+    {"color": [235, 12, 255], "id": 45, "isthing": 1, "name": "counter"},
+    {"color": [160, 150, 20], "id": 46, "isthing": 0, "name": "sand"},
+    {"color": [0, 163, 255], "id": 47, "isthing": 1, "name": "sink"},
+    {"color": [140, 140, 140], "id": 48, "isthing": 0, "name": "skyscraper"},
+    {"color": [250, 10, 15], "id": 49, "isthing": 1, "name": "fireplace"},
+    {"color": [20, 255, 0], "id": 50, "isthing": 1, "name": "refrigerator, icebox"},
+    {"color": [31, 255, 0], "id": 51, "isthing": 0, "name": "grandstand, covered stand"},
+    {"color": [255, 31, 0], "id": 52, "isthing": 0, "name": "path"},
+    {"color": [255, 224, 0], "id": 53, "isthing": 1, "name": "stairs"},
+    {"color": [153, 255, 0], "id": 54, "isthing": 0, "name": "runway"},
+    {"color": [0, 0, 255], "id": 55, "isthing": 1, "name": "case, display case, showcase, vitrine"},
+    {
+        "color": [255, 71, 0],
+        "id": 56,
+        "isthing": 1,
+        "name": "pool table, billiard table, snooker table",
+    },
+    {"color": [0, 235, 255], "id": 57, "isthing": 1, "name": "pillow"},
+    {"color": [0, 173, 255], "id": 58, "isthing": 1, "name": "screen door, screen"},
+    {"color": [31, 0, 255], "id": 59, "isthing": 0, "name": "stairway, staircase"},
+    {"color": [11, 200, 200], "id": 60, "isthing": 0, "name": "river"},
+    {"color": [255, 82, 0], "id": 61, "isthing": 0, "name": "bridge, span"},
+    {"color": [0, 255, 245], "id": 62, "isthing": 1, "name": "bookcase"},
+    {"color": [0, 61, 255], "id": 63, "isthing": 0, "name": "blind, screen"},
+    {"color": [0, 255, 112], "id": 64, "isthing": 1, "name": "coffee table"},
+    {
+        "color": [0, 255, 133],
+        "id": 65,
+        "isthing": 1,
+        "name": "toilet, can, commode, crapper, pot, potty, stool, throne",
+    },
+    {"color": [255, 0, 0], "id": 66, "isthing": 1, "name": "flower"},
+    {"color": [255, 163, 0], "id": 67, "isthing": 1, "name": "book"},
+    {"color": [255, 102, 0], "id": 68, "isthing": 0, "name": "hill"},
+    {"color": [194, 255, 0], "id": 69, "isthing": 1, "name": "bench"},
+    {"color": [0, 143, 255], "id": 70, "isthing": 1, "name": "countertop"},
+    {"color": [51, 255, 0], "id": 71, "isthing": 1, "name": "stove"},
+    {"color": [0, 82, 255], "id": 72, "isthing": 1, "name": "palm, palm tree"},
+    {"color": [0, 255, 41], "id": 73, "isthing": 1, "name": "kitchen island"},
+    {"color": [0, 255, 173], "id": 74, "isthing": 1, "name": "computer"},
+    {"color": [10, 0, 255], "id": 75, "isthing": 1, "name": "swivel chair"},
+    {"color": [173, 255, 0], "id": 76, "isthing": 1, "name": "boat"},
+    {"color": [0, 255, 153], "id": 77, "isthing": 0, "name": "bar"},
+    {"color": [255, 92, 0], "id": 78, "isthing": 1, "name": "arcade machine"},
+    {"color": [255, 0, 255], "id": 79, "isthing": 0, "name": "hovel, hut, hutch, shack, shanty"},
+    {"color": [255, 0, 245], "id": 80, "isthing": 1, "name": "bus"},
+    {"color": [255, 0, 102], "id": 81, "isthing": 1, "name": "towel"},
+    {"color": [255, 173, 0], "id": 82, "isthing": 1, "name": "light"},
+    {"color": [255, 0, 20], "id": 83, "isthing": 1, "name": "truck"},
+    {"color": [255, 184, 184], "id": 84, "isthing": 0, "name": "tower"},
+    {"color": [0, 31, 255], "id": 85, "isthing": 1, "name": "chandelier"},
+    {"color": [0, 255, 61], "id": 86, "isthing": 1, "name": "awning, sunshade, sunblind"},
+    {"color": [0, 71, 255], "id": 87, "isthing": 1, "name": "street lamp"},
+    {"color": [255, 0, 204], "id": 88, "isthing": 1, "name": "booth"},
+    {"color": [0, 255, 194], "id": 89, "isthing": 1, "name": "tv"},
+    {"color": [0, 255, 82], "id": 90, "isthing": 1, "name": "plane"},
+    {"color": [0, 10, 255], "id": 91, "isthing": 0, "name": "dirt track"},
+    {"color": [0, 112, 255], "id": 92, "isthing": 1, "name": "clothes"},
+    {"color": [51, 0, 255], "id": 93, "isthing": 1, "name": "pole"},
+    {"color": [0, 194, 255], "id": 94, "isthing": 0, "name": "land, ground, soil"},
+    {
+        "color": [0, 122, 255],
+        "id": 95,
+        "isthing": 1,
+        "name": "bannister, banister, balustrade, balusters, handrail",
+    },
+    {
+        "color": [0, 255, 163],
+        "id": 96,
+        "isthing": 0,
+        "name": "escalator, moving staircase, moving stairway",
+    },
+    {
+        "color": [255, 153, 0],
+        "id": 97,
+        "isthing": 1,
+        "name": "ottoman, pouf, pouffe, puff, hassock",
+    },
+    {"color": [0, 255, 10], "id": 98, "isthing": 1, "name": "bottle"},
+    {"color": [255, 112, 0], "id": 99, "isthing": 0, "name": "buffet, counter, sideboard"},
+    {
+        "color": [143, 255, 0],
+        "id": 100,
+        "isthing": 0,
+        "name": "poster, posting, placard, notice, bill, card",
+    },
+    {"color": [82, 0, 255], "id": 101, "isthing": 0, "name": "stage"},
+    {"color": [163, 255, 0], "id": 102, "isthing": 1, "name": "van"},
+    {"color": [255, 235, 0], "id": 103, "isthing": 1, "name": "ship"},
+    {"color": [8, 184, 170], "id": 104, "isthing": 1, "name": "fountain"},
+    {
+        "color": [133, 0, 255],
+        "id": 105,
+        "isthing": 0,
+        "name": "conveyer belt, conveyor belt, conveyer, conveyor, transporter",
+    },
+    {"color": [0, 255, 92], "id": 106, "isthing": 0, "name": "canopy"},
+    {
+        "color": [184, 0, 255],
+        "id": 107,
+        "isthing": 1,
+        "name": "washer, automatic washer, washing machine",
+    },
+    {"color": [255, 0, 31], "id": 108, "isthing": 1, "name": "plaything, toy"},
+    {"color": [0, 184, 255], "id": 109, "isthing": 0, "name": "pool"},
+    {"color": [0, 214, 255], "id": 110, "isthing": 1, "name": "stool"},
+    {"color": [255, 0, 112], "id": 111, "isthing": 1, "name": "barrel, cask"},
+    {"color": [92, 255, 0], "id": 112, "isthing": 1, "name": "basket, handbasket"},
+    {"color": [0, 224, 255], "id": 113, "isthing": 0, "name": "falls"},
+    {"color": [112, 224, 255], "id": 114, "isthing": 0, "name": "tent"},
+    {"color": [70, 184, 160], "id": 115, "isthing": 1, "name": "bag"},
+    {"color": [163, 0, 255], "id": 116, "isthing": 1, "name": "minibike, motorbike"},
+    {"color": [153, 0, 255], "id": 117, "isthing": 0, "name": "cradle"},
+    {"color": [71, 255, 0], "id": 118, "isthing": 1, "name": "oven"},
+    {"color": [255, 0, 163], "id": 119, "isthing": 1, "name": "ball"},
+    {"color": [255, 204, 0], "id": 120, "isthing": 1, "name": "food, solid food"},
+    {"color": [255, 0, 143], "id": 121, "isthing": 1, "name": "step, stair"},
+    {"color": [0, 255, 235], "id": 122, "isthing": 0, "name": "tank, storage tank"},
+    {"color": [133, 255, 0], "id": 123, "isthing": 1, "name": "trade name"},
+    {"color": [255, 0, 235], "id": 124, "isthing": 1, "name": "microwave"},
+    {"color": [245, 0, 255], "id": 125, "isthing": 1, "name": "pot"},
+    {"color": [255, 0, 122], "id": 126, "isthing": 1, "name": "animal"},
+    {"color": [255, 245, 0], "id": 127, "isthing": 1, "name": "bicycle"},
+    {"color": [10, 190, 212], "id": 128, "isthing": 0, "name": "lake"},
+    {"color": [214, 255, 0], "id": 129, "isthing": 1, "name": "dishwasher"},
+    {"color": [0, 204, 255], "id": 130, "isthing": 1, "name": "screen"},
+    {"color": [20, 0, 255], "id": 131, "isthing": 0, "name": "blanket, cover"},
+    {"color": [255, 255, 0], "id": 132, "isthing": 1, "name": "sculpture"},
+    {"color": [0, 153, 255], "id": 133, "isthing": 1, "name": "hood, exhaust hood"},
+    {"color": [0, 41, 255], "id": 134, "isthing": 1, "name": "sconce"},
+    {"color": [0, 255, 204], "id": 135, "isthing": 1, "name": "vase"},
+    {"color": [41, 0, 255], "id": 136, "isthing": 1, "name": "traffic light"},
+    {"color": [41, 255, 0], "id": 137, "isthing": 1, "name": "tray"},
+    {"color": [173, 0, 255], "id": 138, "isthing": 1, "name": "trash can"},
+    {"color": [0, 245, 255], "id": 139, "isthing": 1, "name": "fan"},
+    {"color": [71, 0, 255], "id": 140, "isthing": 0, "name": "pier"},
+    {"color": [122, 0, 255], "id": 141, "isthing": 0, "name": "crt screen"},
+    {"color": [0, 255, 184], "id": 142, "isthing": 1, "name": "plate"},
+    {"color": [0, 92, 255], "id": 143, "isthing": 1, "name": "monitor"},
+    {"color": [184, 255, 0], "id": 144, "isthing": 1, "name": "bulletin board"},
+    {"color": [0, 133, 255], "id": 145, "isthing": 0, "name": "shower"},
+    {"color": [255, 214, 0], "id": 146, "isthing": 1, "name": "radiator"},
+    {"color": [25, 194, 194], "id": 147, "isthing": 1, "name": "glass, drinking glass"},
+    {"color": [102, 255, 0], "id": 148, "isthing": 1, "name": "clock"},
+    {"color": [92, 0, 255], "id": 149, "isthing": 1, "name": "flag"},
+]
+
+CITYSCAPES_CATEGORIES = [
+    {"color": (128, 64, 128), "isthing": 0, "id": 7, "trainId": 0, "name": "road"},
+    {"color": (244, 35, 232), "isthing": 0, "id": 8, "trainId": 1, "name": "sidewalk"},
+    {"color": (70, 70, 70), "isthing": 0, "id": 11, "trainId": 2, "name": "building"},
+    {"color": (102, 102, 156), "isthing": 0, "id": 12, "trainId": 3, "name": "wall"},
+    {"color": (190, 153, 153), "isthing": 0, "id": 13, "trainId": 4, "name": "fence"},
+    {"color": (153, 153, 153), "isthing": 0, "id": 17, "trainId": 5, "name": "pole"},
+    {"color": (250, 170, 30), "isthing": 0, "id": 19, "trainId": 6, "name": "traffic light"},
+    {"color": (220, 220, 0), "isthing": 0, "id": 20, "trainId": 7, "name": "traffic sign"},
+    {"color": (107, 142, 35), "isthing": 0, "id": 21, "trainId": 8, "name": "vegetation"},
+    {"color": (152, 251, 152), "isthing": 0, "id": 22, "trainId": 9, "name": "terrain"},
+    {"color": (70, 130, 180), "isthing": 0, "id": 23, "trainId": 10, "name": "sky"},
+    {"color": (220, 20, 60), "isthing": 1, "id": 24, "trainId": 11, "name": "person"},
+    {"color": (255, 0, 0), "isthing": 1, "id": 25, "trainId": 12, "name": "rider"},
+    {"color": (0, 0, 142), "isthing": 1, "id": 26, "trainId": 13, "name": "car"},
+    {"color": (0, 0, 70), "isthing": 1, "id": 27, "trainId": 14, "name": "truck"},
+    {"color": (0, 60, 100), "isthing": 1, "id": 28, "trainId": 15, "name": "bus"},
+    {"color": (0, 80, 100), "isthing": 1, "id": 31, "trainId": 16, "name": "train"},
+    {"color": (0, 0, 230), "isthing": 1, "id": 32, "trainId": 17, "name": "motorcycle"},
+    {"color": (119, 11, 32), "isthing": 1, "id": 33, "trainId": 18, "name": "bicycle"},
+]
+
+ADE20K_847_CATEGORIES = [
+    {"name": "wall", "id": 2978, "trainId": 0},
+    {"name": "building, edifice", "id": 312, "trainId": 1},
+    {"name": "sky", "id": 2420, "trainId": 2},
+    {"name": "tree", "id": 2855, "trainId": 3},
+    {"name": "road, route", "id": 2131, "trainId": 4},
+    {"name": "floor, flooring", "id": 976, "trainId": 5},
+    {"name": "ceiling", "id": 447, "trainId": 6},
+    {"name": "bed", "id": 165, "trainId": 7},
+    {"name": "sidewalk, pavement", "id": 2377, "trainId": 8},
+    {"name": "earth, ground", "id": 838, "trainId": 9},
+    {"name": "cabinet", "id": 350, "trainId": 10},
+    {"name": "person, individual, someone, somebody, mortal, soul", "id": 1831, "trainId": 11},
+    {"name": "grass", "id": 1125, "trainId": 12},
+    {"name": "windowpane, window", "id": 3055, "trainId": 13},
+    {"name": "car, auto, automobile, machine, motorcar", "id": 401, "trainId": 14},
+    {"name": "mountain, mount", "id": 1610, "trainId": 15},
+    {"name": "plant, flora, plant life", "id": 1910, "trainId": 16},
+    {"name": "table", "id": 2684, "trainId": 17},
+    {"name": "chair", "id": 471, "trainId": 18},
+    {"name": "curtain, drape, drapery, mantle, pall", "id": 687, "trainId": 19},
+    {"name": "door", "id": 774, "trainId": 20},
+    {"name": "sofa, couch, lounge", "id": 2473, "trainId": 21},
+    {"name": "sea", "id": 2264, "trainId": 22},
+    {"name": "painting, picture", "id": 1735, "trainId": 23},
+    {"name": "water", "id": 2994, "trainId": 24},
+    {"name": "mirror", "id": 1564, "trainId": 25},
+    {"name": "house", "id": 1276, "trainId": 26},
+    {"name": "rug, carpet, carpeting", "id": 2178, "trainId": 27},
+    {"name": "shelf", "id": 2329, "trainId": 28},
+    {"name": "armchair", "id": 57, "trainId": 29},
+    {"name": "fence, fencing", "id": 907, "trainId": 30},
+    {"name": "field", "id": 913, "trainId": 31},
+    {"name": "lamp", "id": 1395, "trainId": 32},
+    {"name": "rock, stone", "id": 2138, "trainId": 33},
+    {"name": "seat", "id": 2272, "trainId": 34},
+    {"name": "river", "id": 2128, "trainId": 35},
+    {"name": "desk", "id": 724, "trainId": 36},
+    {"name": "bathtub, bathing tub, bath, tub", "id": 155, "trainId": 37},
+    {"name": "railing, rail", "id": 2053, "trainId": 38},
+    {"name": "signboard, sign", "id": 2380, "trainId": 39},
+    {"name": "cushion", "id": 689, "trainId": 40},
+    {"name": "path", "id": 1788, "trainId": 41},
+    {"name": "work surface", "id": 3087, "trainId": 42},
+    {"name": "stairs, steps", "id": 2530, "trainId": 43},
+    {"name": "column, pillar", "id": 581, "trainId": 44},
+    {"name": "sink", "id": 2388, "trainId": 45},
+    {"name": "wardrobe, closet, press", "id": 2985, "trainId": 46},
+    {"name": "snow", "id": 2454, "trainId": 47},
+    {"name": "refrigerator, icebox", "id": 2096, "trainId": 48},
+    {"name": "base, pedestal, stand", "id": 137, "trainId": 49},
+    {"name": "bridge, span", "id": 294, "trainId": 50},
+    {"name": "blind, screen", "id": 212, "trainId": 51},
+    {"name": "runway", "id": 2185, "trainId": 52},
+    {"name": "cliff, drop, drop-off", "id": 524, "trainId": 53},
+    {"name": "sand", "id": 2212, "trainId": 54},
+    {"name": "fireplace, hearth, open fireplace", "id": 943, "trainId": 55},
+    {"name": "pillow", "id": 1869, "trainId": 56},
+    {"name": "screen door, screen", "id": 2251, "trainId": 57},
+    {"name": "toilet, can, commode, crapper, pot, potty, stool, throne", "id": 2793, "trainId": 58},
+    {"name": "skyscraper", "id": 2423, "trainId": 59},
+    {"name": "grandstand, covered stand", "id": 1121, "trainId": 60},
+    {"name": "box", "id": 266, "trainId": 61},
+    {"name": "pool table, billiard table, snooker table", "id": 1948, "trainId": 62},
+    {"name": "palm, palm tree", "id": 1744, "trainId": 63},
+    {"name": "double door", "id": 783, "trainId": 64},
+    {"name": "coffee table, cocktail table", "id": 571, "trainId": 65},
+    {"name": "counter", "id": 627, "trainId": 66},
+    {"name": "countertop", "id": 629, "trainId": 67},
+    {"name": "chest of drawers, chest, bureau, dresser", "id": 491, "trainId": 68},
+    {"name": "kitchen island", "id": 1374, "trainId": 69},
+    {"name": "boat", "id": 223, "trainId": 70},
+    {"name": "waterfall, falls", "id": 3016, "trainId": 71},
+    {
+        "name": "stove, kitchen stove, range, kitchen range, cooking stove",
+        "id": 2598,
+        "trainId": 72,
+    },
+    {"name": "flower", "id": 978, "trainId": 73},
+    {"name": "bookcase", "id": 239, "trainId": 74},
+    {"name": "controls", "id": 608, "trainId": 75},
+    {"name": "book", "id": 236, "trainId": 76},
+    {"name": "stairway, staircase", "id": 2531, "trainId": 77},
+    {"name": "streetlight, street lamp", "id": 2616, "trainId": 78},
+    {
+        "name": "computer, computing machine, computing device, data processor, electronic computer, information processing system",
+        "id": 591,
+        "trainId": 79,
+    },
+    {
+        "name": "bus, autobus, coach, charabanc, double-decker, jitney, motorbus, motorcoach, omnibus, passenger vehicle",
+        "id": 327,
+        "trainId": 80,
+    },
+    {"name": "swivel chair", "id": 2679, "trainId": 81},
+    {"name": "light, light source", "id": 1451, "trainId": 82},
+    {"name": "bench", "id": 181, "trainId": 83},
+    {"name": "case, display case, showcase, vitrine", "id": 420, "trainId": 84},
+    {"name": "towel", "id": 2821, "trainId": 85},
+    {"name": "fountain", "id": 1023, "trainId": 86},
+    {"name": "embankment", "id": 855, "trainId": 87},
+    {
+        "name": "television receiver, television, television set, tv, tv set, idiot box, boob tube, telly, goggle box",
+        "id": 2733,
+        "trainId": 88,
+    },
+    {"name": "van", "id": 2928, "trainId": 89},
+    {"name": "hill", "id": 1240, "trainId": 90},
+    {"name": "awning, sunshade, sunblind", "id": 77, "trainId": 91},
+    {"name": "poster, posting, placard, notice, bill, card", "id": 1969, "trainId": 92},
+    {"name": "truck, motortruck", "id": 2880, "trainId": 93},
+    {"name": "airplane, aeroplane, plane", "id": 14, "trainId": 94},
+    {"name": "pole", "id": 1936, "trainId": 95},
+    {"name": "tower", "id": 2828, "trainId": 96},
+    {"name": "court", "id": 631, "trainId": 97},
+    {"name": "ball", "id": 103, "trainId": 98},
+    {
+        "name": "aircraft carrier, carrier, flattop, attack aircraft carrier",
+        "id": 3144,
+        "trainId": 99,
+    },
+    {"name": "buffet, counter, sideboard", "id": 308, "trainId": 100},
+    {"name": "hovel, hut, hutch, shack, shanty", "id": 1282, "trainId": 101},
+    {"name": "apparel, wearing apparel, dress, clothes", "id": 38, "trainId": 102},
+    {"name": "minibike, motorbike", "id": 1563, "trainId": 103},
+    {"name": "animal, animate being, beast, brute, creature, fauna", "id": 29, "trainId": 104},
+    {"name": "chandelier, pendant, pendent", "id": 480, "trainId": 105},
+    {"name": "step, stair", "id": 2569, "trainId": 106},
+    {"name": "booth, cubicle, stall, kiosk", "id": 247, "trainId": 107},
+    {"name": "bicycle, bike, wheel, cycle", "id": 187, "trainId": 108},
+    {"name": "doorframe, doorcase", "id": 778, "trainId": 109},
+    {"name": "sconce", "id": 2243, "trainId": 110},
+    {"name": "pond", "id": 1941, "trainId": 111},
+    {"name": "trade name, brand name, brand, marque", "id": 2833, "trainId": 112},
+    {"name": "bannister, banister, balustrade, balusters, handrail", "id": 120, "trainId": 113},
+    {"name": "bag", "id": 95, "trainId": 114},
+    {"name": "traffic light, traffic signal, stoplight", "id": 2836, "trainId": 115},
+    {"name": "gazebo", "id": 1087, "trainId": 116},
+    {"name": "escalator, moving staircase, moving stairway", "id": 868, "trainId": 117},
+    {"name": "land, ground, soil", "id": 1401, "trainId": 118},
+    {"name": "board, plank", "id": 220, "trainId": 119},
+    {"name": "arcade machine", "id": 47, "trainId": 120},
+    {"name": "eiderdown, duvet, continental quilt", "id": 843, "trainId": 121},
+    {"name": "bar", "id": 123, "trainId": 122},
+    {"name": "stall, stand, sales booth", "id": 2537, "trainId": 123},
+    {"name": "playground", "id": 1927, "trainId": 124},
+    {"name": "ship", "id": 2337, "trainId": 125},
+    {"name": "ottoman, pouf, pouffe, puff, hassock", "id": 1702, "trainId": 126},
+    {
+        "name": "ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin",
+        "id": 64,
+        "trainId": 127,
+    },
+    {"name": "bottle", "id": 249, "trainId": 128},
+    {"name": "cradle", "id": 642, "trainId": 129},
+    {"name": "pot, flowerpot", "id": 1981, "trainId": 130},
+    {
+        "name": "conveyer belt, conveyor belt, conveyer, conveyor, transporter",
+        "id": 609,
+        "trainId": 131,
+    },
+    {"name": "train, railroad train", "id": 2840, "trainId": 132},
+    {"name": "stool", "id": 2586, "trainId": 133},
+    {"name": "lake", "id": 1393, "trainId": 134},
+    {"name": "tank, storage tank", "id": 2704, "trainId": 135},
+    {"name": "ice, water ice", "id": 1304, "trainId": 136},
+    {"name": "basket, handbasket", "id": 146, "trainId": 137},
+    {"name": "manhole", "id": 1494, "trainId": 138},
+    {"name": "tent, collapsible shelter", "id": 2739, "trainId": 139},
+    {"name": "canopy", "id": 389, "trainId": 140},
+    {"name": "microwave, microwave oven", "id": 1551, "trainId": 141},
+    {"name": "barrel, cask", "id": 131, "trainId": 142},
+    {"name": "dirt track", "id": 738, "trainId": 143},
+    {"name": "beam", "id": 161, "trainId": 144},
+    {"name": "dishwasher, dish washer, dishwashing machine", "id": 747, "trainId": 145},
+    {"name": "plate", "id": 1919, "trainId": 146},
+    {"name": "screen, crt screen", "id": 3109, "trainId": 147},
+    {"name": "ruins", "id": 2179, "trainId": 148},
+    {"name": "washer, automatic washer, washing machine", "id": 2989, "trainId": 149},
+    {"name": "blanket, cover", "id": 206, "trainId": 150},
+    {"name": "plaything, toy", "id": 1930, "trainId": 151},
+    {"name": "food, solid food", "id": 1002, "trainId": 152},
+    {"name": "screen, silver screen, projection screen", "id": 2254, "trainId": 153},
+    {"name": "oven", "id": 1708, "trainId": 154},
+    {"name": "stage", "id": 2526, "trainId": 155},
+    {"name": "beacon, lighthouse, beacon light, pharos", "id": 160, "trainId": 156},
+    {"name": "umbrella", "id": 2901, "trainId": 157},
+    {"name": "sculpture", "id": 2262, "trainId": 158},
+    {"name": "aqueduct", "id": 44, "trainId": 159},
+    {"name": "container", "id": 597, "trainId": 160},
+    {"name": "scaffolding, staging", "id": 2235, "trainId": 161},
+    {"name": "hood, exhaust hood", "id": 1260, "trainId": 162},
+    {"name": "curb, curbing, kerb", "id": 682, "trainId": 163},
+    {"name": "roller coaster", "id": 2151, "trainId": 164},
+    {"name": "horse, equus caballus", "id": 3107, "trainId": 165},
+    {"name": "catwalk", "id": 432, "trainId": 166},
+    {"name": "glass, drinking glass", "id": 1098, "trainId": 167},
+    {"name": "vase", "id": 2932, "trainId": 168},
+    {"name": "central reservation", "id": 461, "trainId": 169},
+    {"name": "carousel", "id": 410, "trainId": 170},
+    {"name": "radiator", "id": 2046, "trainId": 171},
+    {"name": "closet", "id": 533, "trainId": 172},
+    {"name": "machine", "id": 1481, "trainId": 173},
+    {"name": "pier, wharf, wharfage, dock", "id": 1858, "trainId": 174},
+    {"name": "fan", "id": 894, "trainId": 175},
+    {"name": "inflatable bounce game", "id": 1322, "trainId": 176},
+    {"name": "pitch", "id": 1891, "trainId": 177},
+    {"name": "paper", "id": 1756, "trainId": 178},
+    {"name": "arcade, colonnade", "id": 49, "trainId": 179},
+    {"name": "hot tub", "id": 1272, "trainId": 180},
+    {"name": "helicopter", "id": 1229, "trainId": 181},
+    {"name": "tray", "id": 2850, "trainId": 182},
+    {"name": "partition, divider", "id": 1784, "trainId": 183},
+    {"name": "vineyard", "id": 2962, "trainId": 184},
+    {"name": "bowl", "id": 259, "trainId": 185},
+    {"name": "bullring", "id": 319, "trainId": 186},
+    {"name": "flag", "id": 954, "trainId": 187},
+    {"name": "pot", "id": 1974, "trainId": 188},
+    {"name": "footbridge, overcrossing, pedestrian bridge", "id": 1013, "trainId": 189},
+    {"name": "shower", "id": 2356, "trainId": 190},
+    {"name": "bag, traveling bag, travelling bag, grip, suitcase", "id": 97, "trainId": 191},
+    {"name": "bulletin board, notice board", "id": 318, "trainId": 192},
+    {"name": "confessional booth", "id": 592, "trainId": 193},
+    {"name": "trunk, tree trunk, bole", "id": 2885, "trainId": 194},
+    {"name": "forest", "id": 1017, "trainId": 195},
+    {"name": "elevator door", "id": 851, "trainId": 196},
+    {"name": "laptop, laptop computer", "id": 1407, "trainId": 197},
+    {"name": "instrument panel", "id": 1332, "trainId": 198},
+    {"name": "bucket, pail", "id": 303, "trainId": 199},
+    {"name": "tapestry, tapis", "id": 2714, "trainId": 200},
+    {"name": "platform", "id": 1924, "trainId": 201},
+    {"name": "jacket", "id": 1346, "trainId": 202},
+    {"name": "gate", "id": 1081, "trainId": 203},
+    {"name": "monitor, monitoring device", "id": 1583, "trainId": 204},
+    {
+        "name": "telephone booth, phone booth, call box, telephone box, telephone kiosk",
+        "id": 2727,
+        "trainId": 205,
+    },
+    {"name": "spotlight, spot", "id": 2509, "trainId": 206},
+    {"name": "ring", "id": 2123, "trainId": 207},
+    {"name": "control panel", "id": 602, "trainId": 208},
+    {"name": "blackboard, chalkboard", "id": 202, "trainId": 209},
+    {"name": "air conditioner, air conditioning", "id": 10, "trainId": 210},
+    {"name": "chest", "id": 490, "trainId": 211},
+    {"name": "clock", "id": 530, "trainId": 212},
+    {"name": "sand dune", "id": 2213, "trainId": 213},
+    {"name": "pipe, pipage, piping", "id": 1884, "trainId": 214},
+    {"name": "vault", "id": 2934, "trainId": 215},
+    {"name": "table football", "id": 2687, "trainId": 216},
+    {"name": "cannon", "id": 387, "trainId": 217},
+    {"name": "swimming pool, swimming bath, natatorium", "id": 2668, "trainId": 218},
+    {"name": "fluorescent, fluorescent fixture", "id": 982, "trainId": 219},
+    {"name": "statue", "id": 2547, "trainId": 220},
+    {
+        "name": "loudspeaker, speaker, speaker unit, loudspeaker system, speaker system",
+        "id": 1474,
+        "trainId": 221,
+    },
+    {"name": "exhibitor", "id": 877, "trainId": 222},
+    {"name": "ladder", "id": 1391, "trainId": 223},
+    {"name": "carport", "id": 414, "trainId": 224},
+    {"name": "dam", "id": 698, "trainId": 225},
+    {"name": "pulpit", "id": 2019, "trainId": 226},
+    {"name": "skylight, fanlight", "id": 2422, "trainId": 227},
+    {"name": "water tower", "id": 3010, "trainId": 228},
+    {"name": "grill, grille, grillwork", "id": 1139, "trainId": 229},
+    {"name": "display board", "id": 753, "trainId": 230},
+    {"name": "pane, pane of glass, window glass", "id": 1747, "trainId": 231},
+    {"name": "rubbish, trash, scrap", "id": 2175, "trainId": 232},
+    {"name": "ice rink", "id": 1301, "trainId": 233},
+    {"name": "fruit", "id": 1033, "trainId": 234},
+    {"name": "patio", "id": 1789, "trainId": 235},
+    {"name": "vending machine", "id": 2939, "trainId": 236},
+    {"name": "telephone, phone, telephone set", "id": 2730, "trainId": 237},
+    {"name": "net", "id": 1652, "trainId": 238},
+    {
+        "name": "backpack, back pack, knapsack, packsack, rucksack, haversack",
+        "id": 90,
+        "trainId": 239,
+    },
+    {"name": "jar", "id": 1349, "trainId": 240},
+    {"name": "track", "id": 2830, "trainId": 241},
+    {"name": "magazine", "id": 1485, "trainId": 242},
+    {"name": "shutter", "id": 2370, "trainId": 243},
+    {"name": "roof", "id": 2155, "trainId": 244},
+    {"name": "banner, streamer", "id": 118, "trainId": 245},
+    {"name": "landfill", "id": 1402, "trainId": 246},
+    {"name": "post", "id": 1957, "trainId": 247},
+    {"name": "altarpiece, reredos", "id": 3130, "trainId": 248},
+    {"name": "hat, chapeau, lid", "id": 1197, "trainId": 249},
+    {"name": "arch, archway", "id": 52, "trainId": 250},
+    {"name": "table game", "id": 2688, "trainId": 251},
+    {"name": "bag, handbag, pocketbook, purse", "id": 96, "trainId": 252},
+    {"name": "document, written document, papers", "id": 762, "trainId": 253},
+    {"name": "dome", "id": 772, "trainId": 254},
+    {"name": "pier", "id": 1857, "trainId": 255},
+    {"name": "shanties", "id": 2315, "trainId": 256},
+    {"name": "forecourt", "id": 1016, "trainId": 257},
+    {"name": "crane", "id": 643, "trainId": 258},
+    {"name": "dog, domestic dog, canis familiaris", "id": 3105, "trainId": 259},
+    {"name": "piano, pianoforte, forte-piano", "id": 1849, "trainId": 260},
+    {"name": "drawing", "id": 791, "trainId": 261},
+    {"name": "cabin", "id": 349, "trainId": 262},
+    {
+        "name": "ad, advertisement, advertizement, advertising, advertizing, advert",
+        "id": 6,
+        "trainId": 263,
+    },
+    {"name": "amphitheater, amphitheatre, coliseum", "id": 3114, "trainId": 264},
+    {"name": "monument", "id": 1587, "trainId": 265},
+    {"name": "henhouse", "id": 1233, "trainId": 266},
+    {"name": "cockpit", "id": 559, "trainId": 267},
+    {"name": "heater, warmer", "id": 1223, "trainId": 268},
+    {"name": "windmill, aerogenerator, wind generator", "id": 3049, "trainId": 269},
+    {"name": "pool", "id": 1943, "trainId": 270},
+    {"name": "elevator, lift", "id": 853, "trainId": 271},
+    {"name": "decoration, ornament, ornamentation", "id": 709, "trainId": 272},
+    {"name": "labyrinth", "id": 1390, "trainId": 273},
+    {"name": "text, textual matter", "id": 2748, "trainId": 274},
+    {"name": "printer", "id": 2007, "trainId": 275},
+    {"name": "mezzanine, first balcony", "id": 1546, "trainId": 276},
+    {"name": "mattress", "id": 1513, "trainId": 277},
+    {"name": "straw", "id": 2600, "trainId": 278},
+    {"name": "stalls", "id": 2538, "trainId": 279},
+    {"name": "patio, terrace", "id": 1790, "trainId": 280},
+    {"name": "billboard, hoarding", "id": 194, "trainId": 281},
+    {"name": "bus stop", "id": 326, "trainId": 282},
+    {"name": "trouser, pant", "id": 2877, "trainId": 283},
+    {"name": "console table, console", "id": 594, "trainId": 284},
+    {"name": "rack", "id": 2036, "trainId": 285},
+    {"name": "notebook", "id": 1662, "trainId": 286},
+    {"name": "shrine", "id": 2366, "trainId": 287},
+    {"name": "pantry", "id": 1754, "trainId": 288},
+    {"name": "cart", "id": 418, "trainId": 289},
+    {"name": "steam shovel", "id": 2553, "trainId": 290},
+    {"name": "porch", "id": 1951, "trainId": 291},
+    {"name": "postbox, mailbox, letter box", "id": 1963, "trainId": 292},
+    {"name": "figurine, statuette", "id": 918, "trainId": 293},
+    {"name": "recycling bin", "id": 2086, "trainId": 294},
+    {"name": "folding screen", "id": 997, "trainId": 295},
+    {"name": "telescope", "id": 2731, "trainId": 296},
+    {"name": "deck chair, beach chair", "id": 704, "trainId": 297},
+    {"name": "kennel", "id": 1365, "trainId": 298},
+    {"name": "coffee maker", "id": 569, "trainId": 299},
+    {"name": "altar, communion table, lord's table", "id": 3108, "trainId": 300},
+    {"name": "fish", "id": 948, "trainId": 301},
+    {"name": "easel", "id": 839, "trainId": 302},
+    {"name": "artificial golf green", "id": 63, "trainId": 303},
+    {"name": "iceberg", "id": 1305, "trainId": 304},
+    {"name": "candlestick, candle holder", "id": 378, "trainId": 305},
+    {"name": "shower stall, shower bath", "id": 2362, "trainId": 306},
+    {"name": "television stand", "id": 2734, "trainId": 307},
+    {
+        "name": "wall socket, wall plug, electric outlet, electrical outlet, outlet, electric receptacle",
+        "id": 2982,
+        "trainId": 308,
+    },
+    {"name": "skeleton", "id": 2398, "trainId": 309},
+    {"name": "grand piano, grand", "id": 1119, "trainId": 310},
+    {"name": "candy, confect", "id": 382, "trainId": 311},
+    {"name": "grille door", "id": 1141, "trainId": 312},
+    {"name": "pedestal, plinth, footstall", "id": 1805, "trainId": 313},
+    {"name": "jersey, t-shirt, tee shirt", "id": 3102, "trainId": 314},
+    {"name": "shoe", "id": 2341, "trainId": 315},
+    {"name": "gravestone, headstone, tombstone", "id": 1131, "trainId": 316},
+    {"name": "shanty", "id": 2316, "trainId": 317},
+    {"name": "structure", "id": 2626, "trainId": 318},
+    {"name": "rocking chair, rocker", "id": 3104, "trainId": 319},
+    {"name": "bird", "id": 198, "trainId": 320},
+    {"name": "place mat", "id": 1896, "trainId": 321},
+    {"name": "tomb", "id": 2800, "trainId": 322},
+    {"name": "big top", "id": 190, "trainId": 323},
+    {"name": "gas pump, gasoline pump, petrol pump, island dispenser", "id": 3131, "trainId": 324},
+    {"name": "lockers", "id": 1463, "trainId": 325},
+    {"name": "cage", "id": 357, "trainId": 326},
+    {"name": "finger", "id": 929, "trainId": 327},
+    {"name": "bleachers", "id": 209, "trainId": 328},
+    {"name": "ferris wheel", "id": 912, "trainId": 329},
+    {"name": "hairdresser chair", "id": 1164, "trainId": 330},
+    {"name": "mat", "id": 1509, "trainId": 331},
+    {"name": "stands", "id": 2539, "trainId": 332},
+    {"name": "aquarium, fish tank, marine museum", "id": 3116, "trainId": 333},
+    {"name": "streetcar, tram, tramcar, trolley, trolley car", "id": 2615, "trainId": 334},
+    {"name": "napkin, table napkin, serviette", "id": 1644, "trainId": 335},
+    {"name": "dummy", "id": 818, "trainId": 336},
+    {"name": "booklet, brochure, folder, leaflet, pamphlet", "id": 242, "trainId": 337},
+    {"name": "sand trap", "id": 2217, "trainId": 338},
+    {"name": "shop, store", "id": 2347, "trainId": 339},
+    {"name": "table cloth", "id": 2686, "trainId": 340},
+    {"name": "service station", "id": 2300, "trainId": 341},
+    {"name": "coffin", "id": 572, "trainId": 342},
+    {"name": "drawer", "id": 789, "trainId": 343},
+    {"name": "cages", "id": 358, "trainId": 344},
+    {"name": "slot machine, coin machine", "id": 2443, "trainId": 345},
+    {"name": "balcony", "id": 101, "trainId": 346},
+    {"name": "volleyball court", "id": 2969, "trainId": 347},
+    {"name": "table tennis", "id": 2692, "trainId": 348},
+    {"name": "control table", "id": 606, "trainId": 349},
+    {"name": "shirt", "id": 2339, "trainId": 350},
+    {"name": "merchandise, ware, product", "id": 1533, "trainId": 351},
+    {"name": "railway", "id": 2060, "trainId": 352},
+    {"name": "parterre", "id": 1782, "trainId": 353},
+    {"name": "chimney", "id": 495, "trainId": 354},
+    {"name": "can, tin, tin can", "id": 371, "trainId": 355},
+    {"name": "tanks", "id": 2707, "trainId": 356},
+    {"name": "fabric, cloth, material, textile", "id": 889, "trainId": 357},
+    {"name": "alga, algae", "id": 3156, "trainId": 358},
+    {"name": "system", "id": 2683, "trainId": 359},
+    {"name": "map", "id": 1499, "trainId": 360},
+    {"name": "greenhouse", "id": 1135, "trainId": 361},
+    {"name": "mug", "id": 1619, "trainId": 362},
+    {"name": "barbecue", "id": 125, "trainId": 363},
+    {"name": "trailer", "id": 2838, "trainId": 364},
+    {"name": "toilet tissue, toilet paper, bathroom tissue", "id": 2792, "trainId": 365},
+    {"name": "organ", "id": 1695, "trainId": 366},
+    {"name": "dishrag, dishcloth", "id": 746, "trainId": 367},
+    {"name": "island", "id": 1343, "trainId": 368},
+    {"name": "keyboard", "id": 1370, "trainId": 369},
+    {"name": "trench", "id": 2858, "trainId": 370},
+    {"name": "basket, basketball hoop, hoop", "id": 145, "trainId": 371},
+    {"name": "steering wheel, wheel", "id": 2565, "trainId": 372},
+    {"name": "pitcher, ewer", "id": 1892, "trainId": 373},
+    {"name": "goal", "id": 1103, "trainId": 374},
+    {"name": "bread, breadstuff, staff of life", "id": 286, "trainId": 375},
+    {"name": "beds", "id": 170, "trainId": 376},
+    {"name": "wood", "id": 3073, "trainId": 377},
+    {"name": "file cabinet", "id": 922, "trainId": 378},
+    {"name": "newspaper, paper", "id": 1655, "trainId": 379},
+    {"name": "motorboat", "id": 1602, "trainId": 380},
+    {"name": "rope", "id": 2160, "trainId": 381},
+    {"name": "guitar", "id": 1151, "trainId": 382},
+    {"name": "rubble", "id": 2176, "trainId": 383},
+    {"name": "scarf", "id": 2239, "trainId": 384},
+    {"name": "barrels", "id": 132, "trainId": 385},
+    {"name": "cap", "id": 394, "trainId": 386},
+    {"name": "leaves", "id": 1424, "trainId": 387},
+    {"name": "control tower", "id": 607, "trainId": 388},
+    {"name": "dashboard", "id": 700, "trainId": 389},
+    {"name": "bandstand", "id": 116, "trainId": 390},
+    {"name": "lectern", "id": 1425, "trainId": 391},
+    {"name": "switch, electric switch, electrical switch", "id": 2676, "trainId": 392},
+    {"name": "baseboard, mopboard, skirting board", "id": 141, "trainId": 393},
+    {"name": "shower room", "id": 2360, "trainId": 394},
+    {"name": "smoke", "id": 2449, "trainId": 395},
+    {"name": "faucet, spigot", "id": 897, "trainId": 396},
+    {"name": "bulldozer", "id": 317, "trainId": 397},
+    {"name": "saucepan", "id": 2228, "trainId": 398},
+    {"name": "shops", "id": 2351, "trainId": 399},
+    {"name": "meter", "id": 1543, "trainId": 400},
+    {"name": "crevasse", "id": 656, "trainId": 401},
+    {"name": "gear", "id": 1088, "trainId": 402},
+    {"name": "candelabrum, candelabra", "id": 373, "trainId": 403},
+    {"name": "sofa bed", "id": 2472, "trainId": 404},
+    {"name": "tunnel", "id": 2892, "trainId": 405},
+    {"name": "pallet", "id": 1740, "trainId": 406},
+    {"name": "wire, conducting wire", "id": 3067, "trainId": 407},
+    {"name": "kettle, boiler", "id": 1367, "trainId": 408},
+    {"name": "bidet", "id": 188, "trainId": 409},
+    {
+        "name": "baby buggy, baby carriage, carriage, perambulator, pram, stroller, go-cart, pushchair, pusher",
+        "id": 79,
+        "trainId": 410,
+    },
+    {"name": "music stand", "id": 1633, "trainId": 411},
+    {"name": "pipe, tube", "id": 1885, "trainId": 412},
+    {"name": "cup", "id": 677, "trainId": 413},
+    {"name": "parking meter", "id": 1779, "trainId": 414},
+    {"name": "ice hockey rink", "id": 1297, "trainId": 415},
+    {"name": "shelter", "id": 2334, "trainId": 416},
+    {"name": "weeds", "id": 3027, "trainId": 417},
+    {"name": "temple", "id": 2735, "trainId": 418},
+    {"name": "patty, cake", "id": 1791, "trainId": 419},
+    {"name": "ski slope", "id": 2405, "trainId": 420},
+    {"name": "panel", "id": 1748, "trainId": 421},
+    {"name": "wallet", "id": 2983, "trainId": 422},
+    {"name": "wheel", "id": 3035, "trainId": 423},
+    {"name": "towel rack, towel horse", "id": 2824, "trainId": 424},
+    {"name": "roundabout", "id": 2168, "trainId": 425},
+    {"name": "canister, cannister, tin", "id": 385, "trainId": 426},
+    {"name": "rod", "id": 2148, "trainId": 427},
+    {"name": "soap dispenser", "id": 2465, "trainId": 428},
+    {"name": "bell", "id": 175, "trainId": 429},
+    {"name": "canvas", "id": 390, "trainId": 430},
+    {"name": "box office, ticket office, ticket booth", "id": 268, "trainId": 431},
+    {"name": "teacup", "id": 2722, "trainId": 432},
+    {"name": "trellis", "id": 2857, "trainId": 433},
+    {"name": "workbench", "id": 3088, "trainId": 434},
+    {"name": "valley, vale", "id": 2926, "trainId": 435},
+    {"name": "toaster", "id": 2782, "trainId": 436},
+    {"name": "knife", "id": 1378, "trainId": 437},
+    {"name": "podium", "id": 1934, "trainId": 438},
+    {"name": "ramp", "id": 2072, "trainId": 439},
+    {"name": "tumble dryer", "id": 2889, "trainId": 440},
+    {"name": "fireplug, fire hydrant, plug", "id": 944, "trainId": 441},
+    {"name": "gym shoe, sneaker, tennis shoe", "id": 1158, "trainId": 442},
+    {"name": "lab bench", "id": 1383, "trainId": 443},
+    {"name": "equipment", "id": 867, "trainId": 444},
+    {"name": "rocky formation", "id": 2145, "trainId": 445},
+    {"name": "plastic", "id": 1915, "trainId": 446},
+    {"name": "calendar", "id": 361, "trainId": 447},
+    {"name": "caravan", "id": 402, "trainId": 448},
+    {"name": "check-in-desk", "id": 482, "trainId": 449},
+    {"name": "ticket counter", "id": 2761, "trainId": 450},
+    {"name": "brush", "id": 300, "trainId": 451},
+    {"name": "mill", "id": 1554, "trainId": 452},
+    {"name": "covered bridge", "id": 636, "trainId": 453},
+    {"name": "bowling alley", "id": 260, "trainId": 454},
+    {"name": "hanger", "id": 1186, "trainId": 455},
+    {"name": "excavator", "id": 871, "trainId": 456},
+    {"name": "trestle", "id": 2859, "trainId": 457},
+    {"name": "revolving door", "id": 2103, "trainId": 458},
+    {"name": "blast furnace", "id": 208, "trainId": 459},
+    {"name": "scale, weighing machine", "id": 2236, "trainId": 460},
+    {"name": "projector", "id": 2012, "trainId": 461},
+    {"name": "soap", "id": 2462, "trainId": 462},
+    {"name": "locker", "id": 1462, "trainId": 463},
+    {"name": "tractor", "id": 2832, "trainId": 464},
+    {"name": "stretcher", "id": 2617, "trainId": 465},
+    {"name": "frame", "id": 1024, "trainId": 466},
+    {"name": "grating", "id": 1129, "trainId": 467},
+    {"name": "alembic", "id": 18, "trainId": 468},
+    {"name": "candle, taper, wax light", "id": 376, "trainId": 469},
+    {"name": "barrier", "id": 134, "trainId": 470},
+    {"name": "cardboard", "id": 407, "trainId": 471},
+    {"name": "cave", "id": 434, "trainId": 472},
+    {"name": "puddle", "id": 2017, "trainId": 473},
+    {"name": "tarp", "id": 2717, "trainId": 474},
+    {"name": "price tag", "id": 2005, "trainId": 475},
+    {"name": "watchtower", "id": 2993, "trainId": 476},
+    {"name": "meters", "id": 1545, "trainId": 477},
+    {
+        "name": "light bulb, lightbulb, bulb, incandescent lamp, electric light, electric-light bulb",
+        "id": 1445,
+        "trainId": 478,
+    },
+    {"name": "tracks", "id": 2831, "trainId": 479},
+    {"name": "hair dryer", "id": 1161, "trainId": 480},
+    {"name": "skirt", "id": 2411, "trainId": 481},
+    {"name": "viaduct", "id": 2949, "trainId": 482},
+    {"name": "paper towel", "id": 1769, "trainId": 483},
+    {"name": "coat", "id": 552, "trainId": 484},
+    {"name": "sheet", "id": 2327, "trainId": 485},
+    {"name": "fire extinguisher, extinguisher, asphyxiator", "id": 939, "trainId": 486},
+    {"name": "water wheel", "id": 3013, "trainId": 487},
+    {"name": "pottery, clayware", "id": 1986, "trainId": 488},
+    {"name": "magazine rack", "id": 1486, "trainId": 489},
+    {"name": "teapot", "id": 2723, "trainId": 490},
+    {"name": "microphone, mike", "id": 1549, "trainId": 491},
+    {"name": "support", "id": 2649, "trainId": 492},
+    {"name": "forklift", "id": 1020, "trainId": 493},
+    {"name": "canyon", "id": 392, "trainId": 494},
+    {"name": "cash register, register", "id": 422, "trainId": 495},
+    {"name": "leaf, leafage, foliage", "id": 1419, "trainId": 496},
+    {"name": "remote control, remote", "id": 2099, "trainId": 497},
+    {"name": "soap dish", "id": 2464, "trainId": 498},
+    {"name": "windshield, windscreen", "id": 3058, "trainId": 499},
+    {"name": "cat", "id": 430, "trainId": 500},
+    {"name": "cue, cue stick, pool cue, pool stick", "id": 675, "trainId": 501},
+    {"name": "vent, venthole, vent-hole, blowhole", "id": 2941, "trainId": 502},
+    {"name": "videos", "id": 2955, "trainId": 503},
+    {"name": "shovel", "id": 2355, "trainId": 504},
+    {"name": "eaves", "id": 840, "trainId": 505},
+    {"name": "antenna, aerial, transmitting aerial", "id": 32, "trainId": 506},
+    {"name": "shipyard", "id": 2338, "trainId": 507},
+    {"name": "hen, biddy", "id": 1232, "trainId": 508},
+    {"name": "traffic cone", "id": 2834, "trainId": 509},
+    {"name": "washing machines", "id": 2991, "trainId": 510},
+    {"name": "truck crane", "id": 2879, "trainId": 511},
+    {"name": "cds", "id": 444, "trainId": 512},
+    {"name": "niche", "id": 1657, "trainId": 513},
+    {"name": "scoreboard", "id": 2246, "trainId": 514},
+    {"name": "briefcase", "id": 296, "trainId": 515},
+    {"name": "boot", "id": 245, "trainId": 516},
+    {"name": "sweater, jumper", "id": 2661, "trainId": 517},
+    {"name": "hay", "id": 1202, "trainId": 518},
+    {"name": "pack", "id": 1714, "trainId": 519},
+    {"name": "bottle rack", "id": 251, "trainId": 520},
+    {"name": "glacier", "id": 1095, "trainId": 521},
+    {"name": "pergola", "id": 1828, "trainId": 522},
+    {"name": "building materials", "id": 311, "trainId": 523},
+    {"name": "television camera", "id": 2732, "trainId": 524},
+    {"name": "first floor", "id": 947, "trainId": 525},
+    {"name": "rifle", "id": 2115, "trainId": 526},
+    {"name": "tennis table", "id": 2738, "trainId": 527},
+    {"name": "stadium", "id": 2525, "trainId": 528},
+    {"name": "safety belt", "id": 2194, "trainId": 529},
+    {"name": "cover", "id": 634, "trainId": 530},
+    {"name": "dish rack", "id": 740, "trainId": 531},
+    {"name": "synthesizer", "id": 2682, "trainId": 532},
+    {"name": "pumpkin", "id": 2020, "trainId": 533},
+    {"name": "gutter", "id": 1156, "trainId": 534},
+    {"name": "fruit stand", "id": 1036, "trainId": 535},
+    {"name": "ice floe, floe", "id": 1295, "trainId": 536},
+    {"name": "handle, grip, handgrip, hold", "id": 1181, "trainId": 537},
+    {"name": "wheelchair", "id": 3037, "trainId": 538},
+    {"name": "mousepad, mouse mat", "id": 1614, "trainId": 539},
+    {"name": "diploma", "id": 736, "trainId": 540},
+    {"name": "fairground ride", "id": 893, "trainId": 541},
+    {"name": "radio", "id": 2047, "trainId": 542},
+    {"name": "hotplate", "id": 1274, "trainId": 543},
+    {"name": "junk", "id": 1361, "trainId": 544},
+    {"name": "wheelbarrow", "id": 3036, "trainId": 545},
+    {"name": "stream", "id": 2606, "trainId": 546},
+    {"name": "toll plaza", "id": 2797, "trainId": 547},
+    {"name": "punching bag", "id": 2022, "trainId": 548},
+    {"name": "trough", "id": 2876, "trainId": 549},
+    {"name": "throne", "id": 2758, "trainId": 550},
+    {"name": "chair desk", "id": 472, "trainId": 551},
+    {"name": "weighbridge", "id": 3028, "trainId": 552},
+    {"name": "extractor fan", "id": 882, "trainId": 553},
+    {"name": "hanging clothes", "id": 1189, "trainId": 554},
+    {"name": "dish, dish aerial, dish antenna, saucer", "id": 743, "trainId": 555},
+    {"name": "alarm clock, alarm", "id": 3122, "trainId": 556},
+    {"name": "ski lift", "id": 2401, "trainId": 557},
+    {"name": "chain", "id": 468, "trainId": 558},
+    {"name": "garage", "id": 1061, "trainId": 559},
+    {"name": "mechanical shovel", "id": 1523, "trainId": 560},
+    {"name": "wine rack", "id": 3059, "trainId": 561},
+    {"name": "tramway", "id": 2843, "trainId": 562},
+    {"name": "treadmill", "id": 2853, "trainId": 563},
+    {"name": "menu", "id": 1529, "trainId": 564},
+    {"name": "block", "id": 214, "trainId": 565},
+    {"name": "well", "id": 3032, "trainId": 566},
+    {"name": "witness stand", "id": 3071, "trainId": 567},
+    {"name": "branch", "id": 277, "trainId": 568},
+    {"name": "duck", "id": 813, "trainId": 569},
+    {"name": "casserole", "id": 426, "trainId": 570},
+    {"name": "frying pan", "id": 1039, "trainId": 571},
+    {"name": "desk organizer", "id": 727, "trainId": 572},
+    {"name": "mast", "id": 1508, "trainId": 573},
+    {"name": "spectacles, specs, eyeglasses, glasses", "id": 2490, "trainId": 574},
+    {"name": "service elevator", "id": 2299, "trainId": 575},
+    {"name": "dollhouse", "id": 768, "trainId": 576},
+    {"name": "hammock", "id": 1172, "trainId": 577},
+    {"name": "clothes hanging", "id": 537, "trainId": 578},
+    {"name": "photocopier", "id": 1847, "trainId": 579},
+    {"name": "notepad", "id": 1664, "trainId": 580},
+    {"name": "golf cart", "id": 1110, "trainId": 581},
+    {"name": "footpath", "id": 1014, "trainId": 582},
+    {"name": "cross", "id": 662, "trainId": 583},
+    {"name": "baptismal font", "id": 121, "trainId": 584},
+    {"name": "boiler", "id": 227, "trainId": 585},
+    {"name": "skip", "id": 2410, "trainId": 586},
+    {"name": "rotisserie", "id": 2165, "trainId": 587},
+    {"name": "tables", "id": 2696, "trainId": 588},
+    {"name": "water mill", "id": 3005, "trainId": 589},
+    {"name": "helmet", "id": 1231, "trainId": 590},
+    {"name": "cover curtain", "id": 635, "trainId": 591},
+    {"name": "brick", "id": 292, "trainId": 592},
+    {"name": "table runner", "id": 2690, "trainId": 593},
+    {"name": "ashtray", "id": 65, "trainId": 594},
+    {"name": "street box", "id": 2607, "trainId": 595},
+    {"name": "stick", "id": 2574, "trainId": 596},
+    {"name": "hangers", "id": 1188, "trainId": 597},
+    {"name": "cells", "id": 456, "trainId": 598},
+    {"name": "urinal", "id": 2913, "trainId": 599},
+    {"name": "centerpiece", "id": 459, "trainId": 600},
+    {"name": "portable fridge", "id": 1955, "trainId": 601},
+    {"name": "dvds", "id": 827, "trainId": 602},
+    {"name": "golf club", "id": 1111, "trainId": 603},
+    {"name": "skirting board", "id": 2412, "trainId": 604},
+    {"name": "water cooler", "id": 2997, "trainId": 605},
+    {"name": "clipboard", "id": 528, "trainId": 606},
+    {"name": "camera, photographic camera", "id": 366, "trainId": 607},
+    {"name": "pigeonhole", "id": 1863, "trainId": 608},
+    {"name": "chips", "id": 500, "trainId": 609},
+    {"name": "food processor", "id": 1001, "trainId": 610},
+    {"name": "post box", "id": 1958, "trainId": 611},
+    {"name": "lid", "id": 1441, "trainId": 612},
+    {"name": "drum", "id": 809, "trainId": 613},
+    {"name": "blender", "id": 210, "trainId": 614},
+    {"name": "cave entrance", "id": 435, "trainId": 615},
+    {"name": "dental chair", "id": 718, "trainId": 616},
+    {"name": "obelisk", "id": 1674, "trainId": 617},
+    {"name": "canoe", "id": 388, "trainId": 618},
+    {"name": "mobile", "id": 1572, "trainId": 619},
+    {"name": "monitors", "id": 1584, "trainId": 620},
+    {"name": "pool ball", "id": 1944, "trainId": 621},
+    {"name": "cue rack", "id": 674, "trainId": 622},
+    {"name": "baggage carts", "id": 99, "trainId": 623},
+    {"name": "shore", "id": 2352, "trainId": 624},
+    {"name": "fork", "id": 1019, "trainId": 625},
+    {"name": "paper filer", "id": 1763, "trainId": 626},
+    {"name": "bicycle rack", "id": 185, "trainId": 627},
+    {"name": "coat rack", "id": 554, "trainId": 628},
+    {"name": "garland", "id": 1066, "trainId": 629},
+    {"name": "sports bag", "id": 2508, "trainId": 630},
+    {"name": "fish tank", "id": 951, "trainId": 631},
+    {"name": "towel dispenser", "id": 2822, "trainId": 632},
+    {"name": "carriage", "id": 415, "trainId": 633},
+    {"name": "brochure", "id": 297, "trainId": 634},
+    {"name": "plaque", "id": 1914, "trainId": 635},
+    {"name": "stringer", "id": 2619, "trainId": 636},
+    {"name": "iron", "id": 1338, "trainId": 637},
+    {"name": "spoon", "id": 2505, "trainId": 638},
+    {"name": "flag pole", "id": 955, "trainId": 639},
+    {"name": "toilet brush", "id": 2786, "trainId": 640},
+    {"name": "book stand", "id": 238, "trainId": 641},
+    {"name": "water faucet, water tap, tap, hydrant", "id": 3000, "trainId": 642},
+    {"name": "ticket office", "id": 2763, "trainId": 643},
+    {"name": "broom", "id": 299, "trainId": 644},
+    {"name": "dvd", "id": 822, "trainId": 645},
+    {"name": "ice bucket", "id": 1288, "trainId": 646},
+    {"name": "carapace, shell, cuticle, shield", "id": 3101, "trainId": 647},
+    {"name": "tureen", "id": 2894, "trainId": 648},
+    {"name": "folders", "id": 992, "trainId": 649},
+    {"name": "chess", "id": 489, "trainId": 650},
+    {"name": "root", "id": 2157, "trainId": 651},
+    {"name": "sewing machine", "id": 2309, "trainId": 652},
+    {"name": "model", "id": 1576, "trainId": 653},
+    {"name": "pen", "id": 1810, "trainId": 654},
+    {"name": "violin", "id": 2964, "trainId": 655},
+    {"name": "sweatshirt", "id": 2662, "trainId": 656},
+    {"name": "recycling materials", "id": 2087, "trainId": 657},
+    {"name": "mitten", "id": 1569, "trainId": 658},
+    {"name": "chopping board, cutting board", "id": 503, "trainId": 659},
+    {"name": "mask", "id": 1505, "trainId": 660},
+    {"name": "log", "id": 1468, "trainId": 661},
+    {"name": "mouse, computer mouse", "id": 1613, "trainId": 662},
+    {"name": "grill", "id": 1138, "trainId": 663},
+    {"name": "hole", "id": 1256, "trainId": 664},
+    {"name": "target", "id": 2715, "trainId": 665},
+    {"name": "trash bag", "id": 2846, "trainId": 666},
+    {"name": "chalk", "id": 477, "trainId": 667},
+    {"name": "sticks", "id": 2576, "trainId": 668},
+    {"name": "balloon", "id": 108, "trainId": 669},
+    {"name": "score", "id": 2245, "trainId": 670},
+    {"name": "hair spray", "id": 1162, "trainId": 671},
+    {"name": "roll", "id": 2149, "trainId": 672},
+    {"name": "runner", "id": 2183, "trainId": 673},
+    {"name": "engine", "id": 858, "trainId": 674},
+    {"name": "inflatable glove", "id": 1324, "trainId": 675},
+    {"name": "games", "id": 1055, "trainId": 676},
+    {"name": "pallets", "id": 1741, "trainId": 677},
+    {"name": "baskets", "id": 149, "trainId": 678},
+    {"name": "coop", "id": 615, "trainId": 679},
+    {"name": "dvd player", "id": 825, "trainId": 680},
+    {"name": "rocking horse", "id": 2143, "trainId": 681},
+    {"name": "buckets", "id": 304, "trainId": 682},
+    {"name": "bread rolls", "id": 283, "trainId": 683},
+    {"name": "shawl", "id": 2322, "trainId": 684},
+    {"name": "watering can", "id": 3017, "trainId": 685},
+    {"name": "spotlights", "id": 2510, "trainId": 686},
+    {"name": "post-it", "id": 1960, "trainId": 687},
+    {"name": "bowls", "id": 265, "trainId": 688},
+    {"name": "security camera", "id": 2282, "trainId": 689},
+    {"name": "runner cloth", "id": 2184, "trainId": 690},
+    {"name": "lock", "id": 1461, "trainId": 691},
+    {"name": "alarm, warning device, alarm system", "id": 3113, "trainId": 692},
+    {"name": "side", "id": 2372, "trainId": 693},
+    {"name": "roulette", "id": 2166, "trainId": 694},
+    {"name": "bone", "id": 232, "trainId": 695},
+    {"name": "cutlery", "id": 693, "trainId": 696},
+    {"name": "pool balls", "id": 1945, "trainId": 697},
+    {"name": "wheels", "id": 3039, "trainId": 698},
+    {"name": "spice rack", "id": 2494, "trainId": 699},
+    {"name": "plant pots", "id": 1908, "trainId": 700},
+    {"name": "towel ring", "id": 2827, "trainId": 701},
+    {"name": "bread box", "id": 280, "trainId": 702},
+    {"name": "video", "id": 2950, "trainId": 703},
+    {"name": "funfair", "id": 1044, "trainId": 704},
+    {"name": "breads", "id": 288, "trainId": 705},
+    {"name": "tripod", "id": 2863, "trainId": 706},
+    {"name": "ironing board", "id": 1342, "trainId": 707},
+    {"name": "skimmer", "id": 2409, "trainId": 708},
+    {"name": "hollow", "id": 1258, "trainId": 709},
+    {"name": "scratching post", "id": 2249, "trainId": 710},
+    {"name": "tricycle", "id": 2862, "trainId": 711},
+    {"name": "file box", "id": 920, "trainId": 712},
+    {"name": "mountain pass", "id": 1607, "trainId": 713},
+    {"name": "tombstones", "id": 2802, "trainId": 714},
+    {"name": "cooker", "id": 610, "trainId": 715},
+    {"name": "card game, cards", "id": 3129, "trainId": 716},
+    {"name": "golf bag", "id": 1108, "trainId": 717},
+    {"name": "towel paper", "id": 2823, "trainId": 718},
+    {"name": "chaise lounge", "id": 476, "trainId": 719},
+    {"name": "sun", "id": 2641, "trainId": 720},
+    {"name": "toilet paper holder", "id": 2788, "trainId": 721},
+    {"name": "rake", "id": 2070, "trainId": 722},
+    {"name": "key", "id": 1368, "trainId": 723},
+    {"name": "umbrella stand", "id": 2903, "trainId": 724},
+    {"name": "dartboard", "id": 699, "trainId": 725},
+    {"name": "transformer", "id": 2844, "trainId": 726},
+    {"name": "fireplace utensils", "id": 942, "trainId": 727},
+    {"name": "sweatshirts", "id": 2663, "trainId": 728},
+    {
+        "name": "cellular telephone, cellular phone, cellphone, cell, mobile phone",
+        "id": 457,
+        "trainId": 729,
+    },
+    {"name": "tallboy", "id": 2701, "trainId": 730},
+    {"name": "stapler", "id": 2540, "trainId": 731},
+    {"name": "sauna", "id": 2231, "trainId": 732},
+    {"name": "test tube", "id": 2746, "trainId": 733},
+    {"name": "palette", "id": 1738, "trainId": 734},
+    {"name": "shopping carts", "id": 2350, "trainId": 735},
+    {"name": "tools", "id": 2808, "trainId": 736},
+    {"name": "push button, push, button", "id": 2025, "trainId": 737},
+    {"name": "star", "id": 2541, "trainId": 738},
+    {"name": "roof rack", "id": 2156, "trainId": 739},
+    {"name": "barbed wire", "id": 126, "trainId": 740},
+    {"name": "spray", "id": 2512, "trainId": 741},
+    {"name": "ear", "id": 831, "trainId": 742},
+    {"name": "sponge", "id": 2503, "trainId": 743},
+    {"name": "racket", "id": 2039, "trainId": 744},
+    {"name": "tins", "id": 2774, "trainId": 745},
+    {"name": "eyeglasses", "id": 886, "trainId": 746},
+    {"name": "file", "id": 919, "trainId": 747},
+    {"name": "scarfs", "id": 2240, "trainId": 748},
+    {"name": "sugar bowl", "id": 2636, "trainId": 749},
+    {"name": "flip flop", "id": 963, "trainId": 750},
+    {"name": "headstones", "id": 1218, "trainId": 751},
+    {"name": "laptop bag", "id": 1406, "trainId": 752},
+    {"name": "leash", "id": 1420, "trainId": 753},
+    {"name": "climbing frame", "id": 526, "trainId": 754},
+    {"name": "suit hanger", "id": 2639, "trainId": 755},
+    {"name": "floor spotlight", "id": 975, "trainId": 756},
+    {"name": "plate rack", "id": 1921, "trainId": 757},
+    {"name": "sewer", "id": 2305, "trainId": 758},
+    {"name": "hard drive", "id": 1193, "trainId": 759},
+    {"name": "sprinkler", "id": 2517, "trainId": 760},
+    {"name": "tools box", "id": 2809, "trainId": 761},
+    {"name": "necklace", "id": 1647, "trainId": 762},
+    {"name": "bulbs", "id": 314, "trainId": 763},
+    {"name": "steel industry", "id": 2560, "trainId": 764},
+    {"name": "club", "id": 545, "trainId": 765},
+    {"name": "jack", "id": 1345, "trainId": 766},
+    {"name": "door bars", "id": 775, "trainId": 767},
+    {
+        "name": "control panel, instrument panel, control board, board, panel",
+        "id": 603,
+        "trainId": 768,
+    },
+    {"name": "hairbrush", "id": 1163, "trainId": 769},
+    {"name": "napkin holder", "id": 1641, "trainId": 770},
+    {"name": "office", "id": 1678, "trainId": 771},
+    {"name": "smoke detector", "id": 2450, "trainId": 772},
+    {"name": "utensils", "id": 2915, "trainId": 773},
+    {"name": "apron", "id": 42, "trainId": 774},
+    {"name": "scissors", "id": 2242, "trainId": 775},
+    {"name": "terminal", "id": 2741, "trainId": 776},
+    {"name": "grinder", "id": 1143, "trainId": 777},
+    {"name": "entry phone", "id": 862, "trainId": 778},
+    {"name": "newspaper stand", "id": 1654, "trainId": 779},
+    {"name": "pepper shaker", "id": 1826, "trainId": 780},
+    {"name": "onions", "id": 1689, "trainId": 781},
+    {
+        "name": "central processing unit, cpu, c p u , central processor, processor, mainframe",
+        "id": 3124,
+        "trainId": 782,
+    },
+    {"name": "tape", "id": 2710, "trainId": 783},
+    {"name": "bat", "id": 152, "trainId": 784},
+    {"name": "coaster", "id": 549, "trainId": 785},
+    {"name": "calculator", "id": 360, "trainId": 786},
+    {"name": "potatoes", "id": 1982, "trainId": 787},
+    {"name": "luggage rack", "id": 1478, "trainId": 788},
+    {"name": "salt", "id": 2203, "trainId": 789},
+    {"name": "street number", "id": 2612, "trainId": 790},
+    {"name": "viewpoint", "id": 2956, "trainId": 791},
+    {"name": "sword", "id": 2681, "trainId": 792},
+    {"name": "cd", "id": 437, "trainId": 793},
+    {"name": "rowing machine", "id": 2171, "trainId": 794},
+    {"name": "plug", "id": 1933, "trainId": 795},
+    {"name": "andiron, firedog, dog, dog-iron", "id": 3110, "trainId": 796},
+    {"name": "pepper", "id": 1824, "trainId": 797},
+    {"name": "tongs", "id": 2803, "trainId": 798},
+    {"name": "bonfire", "id": 234, "trainId": 799},
+    {"name": "dog dish", "id": 764, "trainId": 800},
+    {"name": "belt", "id": 177, "trainId": 801},
+    {"name": "dumbbells", "id": 817, "trainId": 802},
+    {"name": "videocassette recorder, vcr", "id": 3145, "trainId": 803},
+    {"name": "hook", "id": 1262, "trainId": 804},
+    {"name": "envelopes", "id": 864, "trainId": 805},
+    {"name": "shower faucet", "id": 2359, "trainId": 806},
+    {"name": "watch", "id": 2992, "trainId": 807},
+    {"name": "padlock", "id": 1725, "trainId": 808},
+    {"name": "swimming pool ladder", "id": 2667, "trainId": 809},
+    {"name": "spanners", "id": 2484, "trainId": 810},
+    {"name": "gravy boat", "id": 1133, "trainId": 811},
+    {"name": "notice board", "id": 1667, "trainId": 812},
+    {"name": "trash bags", "id": 2847, "trainId": 813},
+    {"name": "fire alarm", "id": 932, "trainId": 814},
+    {"name": "ladle", "id": 1392, "trainId": 815},
+    {"name": "stethoscope", "id": 2573, "trainId": 816},
+    {"name": "rocket", "id": 2140, "trainId": 817},
+    {"name": "funnel", "id": 1046, "trainId": 818},
+    {"name": "bowling pins", "id": 264, "trainId": 819},
+    {"name": "valve", "id": 2927, "trainId": 820},
+    {"name": "thermometer", "id": 2752, "trainId": 821},
+    {"name": "cups", "id": 679, "trainId": 822},
+    {"name": "spice jar", "id": 2493, "trainId": 823},
+    {"name": "night light", "id": 1658, "trainId": 824},
+    {"name": "soaps", "id": 2466, "trainId": 825},
+    {"name": "games table", "id": 1057, "trainId": 826},
+    {"name": "slotted spoon", "id": 2444, "trainId": 827},
+    {"name": "reel", "id": 2093, "trainId": 828},
+    {"name": "scourer", "id": 2248, "trainId": 829},
+    {"name": "sleeping robe", "id": 2432, "trainId": 830},
+    {"name": "desk mat", "id": 726, "trainId": 831},
+    {"name": "dumbbell", "id": 816, "trainId": 832},
+    {"name": "hammer", "id": 1171, "trainId": 833},
+    {"name": "tie", "id": 2766, "trainId": 834},
+    {"name": "typewriter", "id": 2900, "trainId": 835},
+    {"name": "shaker", "id": 2313, "trainId": 836},
+    {"name": "cheese dish", "id": 488, "trainId": 837},
+    {"name": "sea star", "id": 2265, "trainId": 838},
+    {"name": "racquet", "id": 2043, "trainId": 839},
+    {"name": "butane gas cylinder", "id": 332, "trainId": 840},
+    {"name": "paper weight", "id": 1771, "trainId": 841},
+    {"name": "shaving brush", "id": 2320, "trainId": 842},
+    {"name": "sunglasses", "id": 2646, "trainId": 843},
+    {"name": "gear shift", "id": 1089, "trainId": 844},
+    {"name": "towel rail", "id": 2826, "trainId": 845},
+    {"name": "adding machine, totalizer, totaliser", "id": 3148, "trainId": 846},
+]
+
+PASCAL_VOC_21_CATEGORIES = [
+    {"color": [0, 0, 0], "id": 0, "isthing": 1, "name": "background"},
+    {"color": [128, 0, 0], "id": 1, "isthing": 1, "name": "aeroplane"},
+    {"color": [0, 128, 0], "id": 2, "isthing": 1, "name": "bicycle"},
+    {"color": [128, 128, 0], "id": 3, "isthing": 1, "name": "bird"},
+    {"color": [0, 0, 128], "id": 4, "isthing": 1, "name": "boat"},
+    {"color": [128, 0, 128], "id": 5, "isthing": 1, "name": "bottle"},
+    {"color": [0, 128, 128], "id": 6, "isthing": 1, "name": "bus"},
+    {"color": [128, 128, 128], "id": 7, "isthing": 1, "name": "car"},
+    {"color": [64, 0, 0], "id": 8, "isthing": 1, "name": "cat"},
+    {"color": [192, 0, 0], "id": 9, "isthing": 1, "name": "chair"},
+    {"color": [64, 128, 0], "id": 10, "isthing": 1, "name": "cow"},
+    {"color": [192, 128, 0], "id": 11, "isthing": 1, "name": "diningtable"},
+    {"color": [64, 0, 128], "id": 12, "isthing": 1, "name": "dog"},
+    {"color": [192, 0, 128], "id": 13, "isthing": 1, "name": "horse"},
+    {"color": [64, 128, 128], "id": 14, "isthing": 1, "name": "motorbike"},
+    {"color": [192, 128, 128], "id": 15, "isthing": 1, "name": "person"},
+    {"color": [0, 64, 0], "id": 16, "isthing": 1, "name": "pottedplant"},
+    {"color": [128, 64, 0], "id": 17, "isthing": 1, "name": "sheep"},
+    {"color": [0, 192, 0], "id": 18, "isthing": 1, "name": "sofa"},
+    {"color": [128, 192, 0], "id": 19, "isthing": 1, "name": "train"},
+    {"color": [0, 64, 128], "id": 20, "isthing": 1, "name": "tvmonitor"},
+]
+
+PASCAL_CTX_459_CATEGORIES = [
+    {"color": [120, 120, 120], "id": 0, "isthing": 0, "name": "accordion"},
+    {"color": [180, 120, 120], "id": 1, "isthing": 0, "name": "aeroplane"},
+    {"color": [6, 230, 230], "id": 2, "isthing": 0, "name": "air conditioner"},
+    {"color": [80, 50, 50], "id": 3, "isthing": 0, "name": "antenna"},
+    {"color": [4, 200, 3], "id": 4, "isthing": 0, "name": "artillery"},
+    {"color": [120, 120, 80], "id": 5, "isthing": 0, "name": "ashtray"},
+    {"color": [140, 140, 140], "id": 6, "isthing": 0, "name": "atrium"},
+    {"color": [204, 5, 255], "id": 7, "isthing": 0, "name": "baby carriage"},
+    {"color": [230, 230, 230], "id": 8, "isthing": 0, "name": "bag"},
+    {"color": [4, 250, 7], "id": 9, "isthing": 0, "name": "ball"},
+    {"color": [224, 5, 255], "id": 10, "isthing": 0, "name": "balloon"},
+    {"color": [235, 255, 7], "id": 11, "isthing": 0, "name": "bamboo weaving"},
+    {"color": [150, 5, 61], "id": 12, "isthing": 0, "name": "barrel"},
+    {"color": [120, 120, 70], "id": 13, "isthing": 0, "name": "baseball bat"},
+    {"color": [8, 255, 51], "id": 14, "isthing": 0, "name": "basket"},
+    {"color": [255, 6, 82], "id": 15, "isthing": 0, "name": "basketball backboard"},
+    {"color": [143, 255, 140], "id": 16, "isthing": 0, "name": "bathtub"},
+    {"color": [204, 255, 4], "id": 17, "isthing": 0, "name": "bed"},
+    {"color": [255, 51, 7], "id": 18, "isthing": 0, "name": "bedclothes"},
+    {"color": [204, 70, 3], "id": 19, "isthing": 0, "name": "beer"},
+    {"color": [0, 102, 200], "id": 20, "isthing": 0, "name": "bell"},
+    {"color": [61, 230, 250], "id": 21, "isthing": 0, "name": "bench"},
+    {"color": [255, 6, 51], "id": 22, "isthing": 0, "name": "bicycle"},
+    {"color": [11, 102, 255], "id": 23, "isthing": 0, "name": "binoculars"},
+    {"color": [255, 7, 71], "id": 24, "isthing": 0, "name": "bird"},
+    {"color": [255, 9, 224], "id": 25, "isthing": 0, "name": "bird cage"},
+    {"color": [9, 7, 230], "id": 26, "isthing": 0, "name": "bird feeder"},
+    {"color": [220, 220, 220], "id": 27, "isthing": 0, "name": "bird nest"},
+    {"color": [255, 9, 92], "id": 28, "isthing": 0, "name": "blackboard"},
+    {"color": [112, 9, 255], "id": 29, "isthing": 0, "name": "board"},
+    {"color": [8, 255, 214], "id": 30, "isthing": 0, "name": "boat"},
+    {"color": [7, 255, 224], "id": 31, "isthing": 0, "name": "bone"},
+    {"color": [255, 184, 6], "id": 32, "isthing": 0, "name": "book"},
+    {"color": [10, 255, 71], "id": 33, "isthing": 0, "name": "bottle"},
+    {"color": [255, 41, 10], "id": 34, "isthing": 0, "name": "bottle opener"},
+    {"color": [7, 255, 255], "id": 35, "isthing": 0, "name": "bowl"},
+    {"color": [224, 255, 8], "id": 36, "isthing": 0, "name": "box"},
+    {"color": [102, 8, 255], "id": 37, "isthing": 0, "name": "bracelet"},
+    {"color": [255, 61, 6], "id": 38, "isthing": 0, "name": "brick"},
+    {"color": [255, 194, 7], "id": 39, "isthing": 0, "name": "bridge"},
+    {"color": [255, 122, 8], "id": 40, "isthing": 0, "name": "broom"},
+    {"color": [0, 255, 20], "id": 41, "isthing": 0, "name": "brush"},
+    {"color": [255, 8, 41], "id": 42, "isthing": 0, "name": "bucket"},
+    {"color": [255, 5, 153], "id": 43, "isthing": 0, "name": "building"},
+    {"color": [6, 51, 255], "id": 44, "isthing": 0, "name": "bus"},
+    {"color": [235, 12, 255], "id": 45, "isthing": 0, "name": "cabinet"},
+    {"color": [160, 150, 20], "id": 46, "isthing": 0, "name": "cabinet door"},
+    {"color": [0, 163, 255], "id": 47, "isthing": 0, "name": "cage"},
+    {"color": [140, 140, 140], "id": 48, "isthing": 0, "name": "cake"},
+    {"color": [250, 10, 15], "id": 49, "isthing": 0, "name": "calculator"},
+    {"color": [20, 255, 0], "id": 50, "isthing": 0, "name": "calendar"},
+    {"color": [31, 255, 0], "id": 51, "isthing": 0, "name": "camel"},
+    {"color": [255, 31, 0], "id": 52, "isthing": 0, "name": "camera"},
+    {"color": [255, 224, 0], "id": 53, "isthing": 0, "name": "camera lens"},
+    {"color": [153, 255, 0], "id": 54, "isthing": 0, "name": "can"},
+    {"color": [0, 0, 255], "id": 55, "isthing": 0, "name": "candle"},
+    {"color": [255, 71, 0], "id": 56, "isthing": 0, "name": "candle holder"},
+    {"color": [0, 235, 255], "id": 57, "isthing": 0, "name": "cap"},
+    {"color": [0, 173, 255], "id": 58, "isthing": 0, "name": "car"},
+    {"color": [31, 0, 255], "id": 59, "isthing": 0, "name": "card"},
+    {"color": [120, 120, 120], "id": 60, "isthing": 0, "name": "cart"},
+    {"color": [180, 120, 120], "id": 61, "isthing": 0, "name": "case"},
+    {"color": [6, 230, 230], "id": 62, "isthing": 0, "name": "casette recorder"},
+    {"color": [80, 50, 50], "id": 63, "isthing": 0, "name": "cash register"},
+    {"color": [4, 200, 3], "id": 64, "isthing": 0, "name": "cat"},
+    {"color": [120, 120, 80], "id": 65, "isthing": 0, "name": "cd"},
+    {"color": [140, 140, 140], "id": 66, "isthing": 0, "name": "cd player"},
+    {"color": [204, 5, 255], "id": 67, "isthing": 0, "name": "ceiling"},
+    {"color": [230, 230, 230], "id": 68, "isthing": 0, "name": "cell phone"},
+    {"color": [4, 250, 7], "id": 69, "isthing": 0, "name": "cello"},
+    {"color": [224, 5, 255], "id": 70, "isthing": 0, "name": "chain"},
+    {"color": [235, 255, 7], "id": 71, "isthing": 0, "name": "chair"},
+    {"color": [150, 5, 61], "id": 72, "isthing": 0, "name": "chessboard"},
+    {"color": [120, 120, 70], "id": 73, "isthing": 0, "name": "chicken"},
+    {"color": [8, 255, 51], "id": 74, "isthing": 0, "name": "chopstick"},
+    {"color": [255, 6, 82], "id": 75, "isthing": 0, "name": "clip"},
+    {"color": [143, 255, 140], "id": 76, "isthing": 0, "name": "clippers"},
+    {"color": [204, 255, 4], "id": 77, "isthing": 0, "name": "clock"},
+    {"color": [255, 51, 7], "id": 78, "isthing": 0, "name": "closet"},
+    {"color": [204, 70, 3], "id": 79, "isthing": 0, "name": "cloth"},
+    {"color": [0, 102, 200], "id": 80, "isthing": 0, "name": "clothes tree"},
+    {"color": [61, 230, 250], "id": 81, "isthing": 0, "name": "coffee"},
+    {"color": [255, 6, 51], "id": 82, "isthing": 0, "name": "coffee machine"},
+    {"color": [11, 102, 255], "id": 83, "isthing": 0, "name": "comb"},
+    {"color": [255, 7, 71], "id": 84, "isthing": 0, "name": "computer"},
+    {"color": [255, 9, 224], "id": 85, "isthing": 0, "name": "concrete"},
+    {"color": [9, 7, 230], "id": 86, "isthing": 0, "name": "cone"},
+    {"color": [220, 220, 220], "id": 87, "isthing": 0, "name": "container"},
+    {"color": [255, 9, 92], "id": 88, "isthing": 0, "name": "control booth"},
+    {"color": [112, 9, 255], "id": 89, "isthing": 0, "name": "controller"},
+    {"color": [8, 255, 214], "id": 90, "isthing": 0, "name": "cooker"},
+    {"color": [7, 255, 224], "id": 91, "isthing": 0, "name": "copying machine"},
+    {"color": [255, 184, 6], "id": 92, "isthing": 0, "name": "coral"},
+    {"color": [10, 255, 71], "id": 93, "isthing": 0, "name": "cork"},
+    {"color": [255, 41, 10], "id": 94, "isthing": 0, "name": "corkscrew"},
+    {"color": [7, 255, 255], "id": 95, "isthing": 0, "name": "counter"},
+    {"color": [224, 255, 8], "id": 96, "isthing": 0, "name": "court"},
+    {"color": [102, 8, 255], "id": 97, "isthing": 0, "name": "cow"},
+    {"color": [255, 61, 6], "id": 98, "isthing": 0, "name": "crabstick"},
+    {"color": [255, 194, 7], "id": 99, "isthing": 0, "name": "crane"},
+    {"color": [255, 122, 8], "id": 100, "isthing": 0, "name": "crate"},
+    {"color": [0, 255, 20], "id": 101, "isthing": 0, "name": "cross"},
+    {"color": [255, 8, 41], "id": 102, "isthing": 0, "name": "crutch"},
+    {"color": [255, 5, 153], "id": 103, "isthing": 0, "name": "cup"},
+    {"color": [6, 51, 255], "id": 104, "isthing": 0, "name": "curtain"},
+    {"color": [235, 12, 255], "id": 105, "isthing": 0, "name": "cushion"},
+    {"color": [160, 150, 20], "id": 106, "isthing": 0, "name": "cutting board"},
+    {"color": [0, 163, 255], "id": 107, "isthing": 0, "name": "dais"},
+    {"color": [140, 140, 140], "id": 108, "isthing": 0, "name": "disc"},
+    {"color": [250, 10, 15], "id": 109, "isthing": 0, "name": "disc case"},
+    {"color": [20, 255, 0], "id": 110, "isthing": 0, "name": "dishwasher"},
+    {"color": [31, 255, 0], "id": 111, "isthing": 0, "name": "dock"},
+    {"color": [255, 31, 0], "id": 112, "isthing": 0, "name": "dog"},
+    {"color": [255, 224, 0], "id": 113, "isthing": 0, "name": "dolphin"},
+    {"color": [153, 255, 0], "id": 114, "isthing": 0, "name": "door"},
+    {"color": [0, 0, 255], "id": 115, "isthing": 0, "name": "drainer"},
+    {"color": [255, 71, 0], "id": 116, "isthing": 0, "name": "dray"},
+    {"color": [0, 235, 255], "id": 117, "isthing": 0, "name": "drink dispenser"},
+    {"color": [0, 173, 255], "id": 118, "isthing": 0, "name": "drinking machine"},
+    {"color": [31, 0, 255], "id": 119, "isthing": 0, "name": "drop"},
+    {"color": [120, 120, 120], "id": 120, "isthing": 0, "name": "drug"},
+    {"color": [180, 120, 120], "id": 121, "isthing": 0, "name": "drum"},
+    {"color": [6, 230, 230], "id": 122, "isthing": 0, "name": "drum kit"},
+    {"color": [80, 50, 50], "id": 123, "isthing": 0, "name": "duck"},
+    {"color": [4, 200, 3], "id": 124, "isthing": 0, "name": "dumbbell"},
+    {"color": [120, 120, 80], "id": 125, "isthing": 0, "name": "earphone"},
+    {"color": [140, 140, 140], "id": 126, "isthing": 0, "name": "earrings"},
+    {"color": [204, 5, 255], "id": 127, "isthing": 0, "name": "egg"},
+    {"color": [230, 230, 230], "id": 128, "isthing": 0, "name": "electric fan"},
+    {"color": [4, 250, 7], "id": 129, "isthing": 0, "name": "electric iron"},
+    {"color": [224, 5, 255], "id": 130, "isthing": 0, "name": "electric pot"},
+    {"color": [235, 255, 7], "id": 131, "isthing": 0, "name": "electric saw"},
+    {"color": [150, 5, 61], "id": 132, "isthing": 0, "name": "electronic keyboard"},
+    {"color": [120, 120, 70], "id": 133, "isthing": 0, "name": "engine"},
+    {"color": [8, 255, 51], "id": 134, "isthing": 0, "name": "envelope"},
+    {"color": [255, 6, 82], "id": 135, "isthing": 0, "name": "equipment"},
+    {"color": [143, 255, 140], "id": 136, "isthing": 0, "name": "escalator"},
+    {"color": [204, 255, 4], "id": 137, "isthing": 0, "name": "exhibition booth"},
+    {"color": [255, 51, 7], "id": 138, "isthing": 0, "name": "extinguisher"},
+    {"color": [204, 70, 3], "id": 139, "isthing": 0, "name": "eyeglass"},
+    {"color": [0, 102, 200], "id": 140, "isthing": 0, "name": "fan"},
+    {"color": [61, 230, 250], "id": 141, "isthing": 0, "name": "faucet"},
+    {"color": [255, 6, 51], "id": 142, "isthing": 0, "name": "fax machine"},
+    {"color": [11, 102, 255], "id": 143, "isthing": 0, "name": "fence"},
+    {"color": [255, 7, 71], "id": 144, "isthing": 0, "name": "ferris wheel"},
+    {"color": [255, 9, 224], "id": 145, "isthing": 0, "name": "fire extinguisher"},
+    {"color": [9, 7, 230], "id": 146, "isthing": 0, "name": "fire hydrant"},
+    {"color": [220, 220, 220], "id": 147, "isthing": 0, "name": "fire place"},
+    {"color": [255, 9, 92], "id": 148, "isthing": 0, "name": "fish"},
+    {"color": [112, 9, 255], "id": 149, "isthing": 0, "name": "fish tank"},
+    {"color": [8, 255, 214], "id": 150, "isthing": 0, "name": "fishbowl"},
+    {"color": [7, 255, 224], "id": 151, "isthing": 0, "name": "fishing net"},
+    {"color": [255, 184, 6], "id": 152, "isthing": 0, "name": "fishing pole"},
+    {"color": [10, 255, 71], "id": 153, "isthing": 0, "name": "flag"},
+    {"color": [255, 41, 10], "id": 154, "isthing": 0, "name": "flagstaff"},
+    {"color": [7, 255, 255], "id": 155, "isthing": 0, "name": "flame"},
+    {"color": [224, 255, 8], "id": 156, "isthing": 0, "name": "flashlight"},
+    {"color": [102, 8, 255], "id": 157, "isthing": 0, "name": "floor"},
+    {"color": [255, 61, 6], "id": 158, "isthing": 0, "name": "flower"},
+    {"color": [255, 194, 7], "id": 159, "isthing": 0, "name": "fly"},
+    {"color": [255, 122, 8], "id": 160, "isthing": 0, "name": "foam"},
+    {"color": [0, 255, 20], "id": 161, "isthing": 0, "name": "food"},
+    {"color": [255, 8, 41], "id": 162, "isthing": 0, "name": "footbridge"},
+    {"color": [255, 5, 153], "id": 163, "isthing": 0, "name": "forceps"},
+    {"color": [6, 51, 255], "id": 164, "isthing": 0, "name": "fork"},
+    {"color": [235, 12, 255], "id": 165, "isthing": 0, "name": "forklift"},
+    {"color": [160, 150, 20], "id": 166, "isthing": 0, "name": "fountain"},
+    {"color": [0, 163, 255], "id": 167, "isthing": 0, "name": "fox"},
+    {"color": [140, 140, 140], "id": 168, "isthing": 0, "name": "frame"},
+    {"color": [250, 10, 15], "id": 169, "isthing": 0, "name": "fridge"},
+    {"color": [20, 255, 0], "id": 170, "isthing": 0, "name": "frog"},
+    {"color": [31, 255, 0], "id": 171, "isthing": 0, "name": "fruit"},
+    {"color": [255, 31, 0], "id": 172, "isthing": 0, "name": "funnel"},
+    {"color": [255, 224, 0], "id": 173, "isthing": 0, "name": "furnace"},
+    {"color": [153, 255, 0], "id": 174, "isthing": 0, "name": "game controller"},
+    {"color": [0, 0, 255], "id": 175, "isthing": 0, "name": "game machine"},
+    {"color": [255, 71, 0], "id": 176, "isthing": 0, "name": "gas cylinder"},
+    {"color": [0, 235, 255], "id": 177, "isthing": 0, "name": "gas hood"},
+    {"color": [0, 173, 255], "id": 178, "isthing": 0, "name": "gas stove"},
+    {"color": [31, 0, 255], "id": 179, "isthing": 0, "name": "gift box"},
+    {"color": [120, 120, 120], "id": 180, "isthing": 0, "name": "glass"},
+    {"color": [180, 120, 120], "id": 181, "isthing": 0, "name": "glass marble"},
+    {"color": [6, 230, 230], "id": 182, "isthing": 0, "name": "globe"},
+    {"color": [80, 50, 50], "id": 183, "isthing": 0, "name": "glove"},
+    {"color": [4, 200, 3], "id": 184, "isthing": 0, "name": "goal"},
+    {"color": [120, 120, 80], "id": 185, "isthing": 0, "name": "grandstand"},
+    {"color": [140, 140, 140], "id": 186, "isthing": 0, "name": "grass"},
+    {"color": [204, 5, 255], "id": 187, "isthing": 0, "name": "gravestone"},
+    {"color": [230, 230, 230], "id": 188, "isthing": 0, "name": "ground"},
+    {"color": [4, 250, 7], "id": 189, "isthing": 0, "name": "guardrail"},
+    {"color": [224, 5, 255], "id": 190, "isthing": 0, "name": "guitar"},
+    {"color": [235, 255, 7], "id": 191, "isthing": 0, "name": "gun"},
+    {"color": [150, 5, 61], "id": 192, "isthing": 0, "name": "hammer"},
+    {"color": [120, 120, 70], "id": 193, "isthing": 0, "name": "hand cart"},
+    {"color": [8, 255, 51], "id": 194, "isthing": 0, "name": "handle"},
+    {"color": [255, 6, 82], "id": 195, "isthing": 0, "name": "handrail"},
+    {"color": [143, 255, 140], "id": 196, "isthing": 0, "name": "hanger"},
+    {"color": [204, 255, 4], "id": 197, "isthing": 0, "name": "hard disk drive"},
+    {"color": [255, 51, 7], "id": 198, "isthing": 0, "name": "hat"},
+    {"color": [204, 70, 3], "id": 199, "isthing": 0, "name": "hay"},
+    {"color": [0, 102, 200], "id": 200, "isthing": 0, "name": "headphone"},
+    {"color": [61, 230, 250], "id": 201, "isthing": 0, "name": "heater"},
+    {"color": [255, 6, 51], "id": 202, "isthing": 0, "name": "helicopter"},
+    {"color": [11, 102, 255], "id": 203, "isthing": 0, "name": "helmet"},
+    {"color": [255, 7, 71], "id": 204, "isthing": 0, "name": "holder"},
+    {"color": [255, 9, 224], "id": 205, "isthing": 0, "name": "hook"},
+    {"color": [9, 7, 230], "id": 206, "isthing": 0, "name": "horse"},
+    {"color": [220, 220, 220], "id": 207, "isthing": 0, "name": "horse-drawn carriage"},
+    {"color": [255, 9, 92], "id": 208, "isthing": 0, "name": "hot-air balloon"},
+    {"color": [112, 9, 255], "id": 209, "isthing": 0, "name": "hydrovalve"},
+    {"color": [8, 255, 214], "id": 210, "isthing": 0, "name": "ice"},
+    {"color": [7, 255, 224], "id": 211, "isthing": 0, "name": "inflator pump"},
+    {"color": [255, 184, 6], "id": 212, "isthing": 0, "name": "ipod"},
+    {"color": [10, 255, 71], "id": 213, "isthing": 0, "name": "iron"},
+    {"color": [255, 41, 10], "id": 214, "isthing": 0, "name": "ironing board"},
+    {"color": [7, 255, 255], "id": 215, "isthing": 0, "name": "jar"},
+    {"color": [224, 255, 8], "id": 216, "isthing": 0, "name": "kart"},
+    {"color": [102, 8, 255], "id": 217, "isthing": 0, "name": "kettle"},
+    {"color": [255, 61, 6], "id": 218, "isthing": 0, "name": "key"},
+    {"color": [255, 194, 7], "id": 219, "isthing": 0, "name": "keyboard"},
+    {"color": [255, 122, 8], "id": 220, "isthing": 0, "name": "kitchen range"},
+    {"color": [0, 255, 20], "id": 221, "isthing": 0, "name": "kite"},
+    {"color": [255, 8, 41], "id": 222, "isthing": 0, "name": "knife"},
+    {"color": [255, 5, 153], "id": 223, "isthing": 0, "name": "knife block"},
+    {"color": [6, 51, 255], "id": 224, "isthing": 0, "name": "ladder"},
+    {"color": [235, 12, 255], "id": 225, "isthing": 0, "name": "ladder truck"},
+    {"color": [160, 150, 20], "id": 226, "isthing": 0, "name": "ladle"},
+    {"color": [0, 163, 255], "id": 227, "isthing": 0, "name": "laptop"},
+    {"color": [140, 140, 140], "id": 228, "isthing": 0, "name": "leaves"},
+    {"color": [250, 10, 15], "id": 229, "isthing": 0, "name": "lid"},
+    {"color": [20, 255, 0], "id": 230, "isthing": 0, "name": "life buoy"},
+    {"color": [31, 255, 0], "id": 231, "isthing": 0, "name": "light"},
+    {"color": [255, 31, 0], "id": 232, "isthing": 0, "name": "light bulb"},
+    {"color": [255, 224, 0], "id": 233, "isthing": 0, "name": "lighter"},
+    {"color": [153, 255, 0], "id": 234, "isthing": 0, "name": "line"},
+    {"color": [0, 0, 255], "id": 235, "isthing": 0, "name": "lion"},
+    {"color": [255, 71, 0], "id": 236, "isthing": 0, "name": "lobster"},
+    {"color": [0, 235, 255], "id": 237, "isthing": 0, "name": "lock"},
+    {"color": [0, 173, 255], "id": 238, "isthing": 0, "name": "machine"},
+    {"color": [31, 0, 255], "id": 239, "isthing": 0, "name": "mailbox"},
+    {"color": [120, 120, 120], "id": 240, "isthing": 0, "name": "mannequin"},
+    {"color": [180, 120, 120], "id": 241, "isthing": 0, "name": "map"},
+    {"color": [6, 230, 230], "id": 242, "isthing": 0, "name": "mask"},
+    {"color": [80, 50, 50], "id": 243, "isthing": 0, "name": "mat"},
+    {"color": [4, 200, 3], "id": 244, "isthing": 0, "name": "match book"},
+    {"color": [120, 120, 80], "id": 245, "isthing": 0, "name": "mattress"},
+    {"color": [140, 140, 140], "id": 246, "isthing": 0, "name": "menu"},
+    {"color": [204, 5, 255], "id": 247, "isthing": 0, "name": "metal"},
+    {"color": [230, 230, 230], "id": 248, "isthing": 0, "name": "meter box"},
+    {"color": [4, 250, 7], "id": 249, "isthing": 0, "name": "microphone"},
+    {"color": [224, 5, 255], "id": 250, "isthing": 0, "name": "microwave"},
+    {"color": [235, 255, 7], "id": 251, "isthing": 0, "name": "mirror"},
+    {"color": [150, 5, 61], "id": 252, "isthing": 0, "name": "missile"},
+    {"color": [120, 120, 70], "id": 253, "isthing": 0, "name": "model"},
+    {"color": [8, 255, 51], "id": 254, "isthing": 0, "name": "money"},
+    {"color": [255, 6, 82], "id": 255, "isthing": 0, "name": "monkey"},
+    {"color": [143, 255, 140], "id": 256, "isthing": 0, "name": "mop"},
+    {"color": [204, 255, 4], "id": 257, "isthing": 0, "name": "motorbike"},
+    {"color": [255, 51, 7], "id": 258, "isthing": 0, "name": "mountain"},
+    {"color": [204, 70, 3], "id": 259, "isthing": 0, "name": "mouse"},
+    {"color": [0, 102, 200], "id": 260, "isthing": 0, "name": "mouse pad"},
+    {"color": [61, 230, 250], "id": 261, "isthing": 0, "name": "musical instrument"},
+    {"color": [255, 6, 51], "id": 262, "isthing": 0, "name": "napkin"},
+    {"color": [11, 102, 255], "id": 263, "isthing": 0, "name": "net"},
+    {"color": [255, 7, 71], "id": 264, "isthing": 0, "name": "newspaper"},
+    {"color": [255, 9, 224], "id": 265, "isthing": 0, "name": "oar"},
+    {"color": [9, 7, 230], "id": 266, "isthing": 0, "name": "ornament"},
+    {"color": [220, 220, 220], "id": 267, "isthing": 0, "name": "outlet"},
+    {"color": [255, 9, 92], "id": 268, "isthing": 0, "name": "oven"},
+    {"color": [112, 9, 255], "id": 269, "isthing": 0, "name": "oxygen bottle"},
+    {"color": [8, 255, 214], "id": 270, "isthing": 0, "name": "pack"},
+    {"color": [7, 255, 224], "id": 271, "isthing": 0, "name": "pan"},
+    {"color": [255, 184, 6], "id": 272, "isthing": 0, "name": "paper"},
+    {"color": [10, 255, 71], "id": 273, "isthing": 0, "name": "paper box"},
+    {"color": [255, 41, 10], "id": 274, "isthing": 0, "name": "paper cutter"},
+    {"color": [7, 255, 255], "id": 275, "isthing": 0, "name": "parachute"},
+    {"color": [224, 255, 8], "id": 276, "isthing": 0, "name": "parasol"},
+    {"color": [102, 8, 255], "id": 277, "isthing": 0, "name": "parterre"},
+    {"color": [255, 61, 6], "id": 278, "isthing": 0, "name": "patio"},
+    {"color": [255, 194, 7], "id": 279, "isthing": 0, "name": "pelage"},
+    {"color": [255, 122, 8], "id": 280, "isthing": 0, "name": "pen"},
+    {"color": [0, 255, 20], "id": 281, "isthing": 0, "name": "pen container"},
+    {"color": [255, 8, 41], "id": 282, "isthing": 0, "name": "pencil"},
+    {"color": [255, 5, 153], "id": 283, "isthing": 0, "name": "person"},
+    {"color": [6, 51, 255], "id": 284, "isthing": 0, "name": "photo"},
+    {"color": [235, 12, 255], "id": 285, "isthing": 0, "name": "piano"},
+    {"color": [160, 150, 20], "id": 286, "isthing": 0, "name": "picture"},
+    {"color": [0, 163, 255], "id": 287, "isthing": 0, "name": "pig"},
+    {"color": [140, 140, 140], "id": 288, "isthing": 0, "name": "pillar"},
+    {"color": [250, 10, 15], "id": 289, "isthing": 0, "name": "pillow"},
+    {"color": [20, 255, 0], "id": 290, "isthing": 0, "name": "pipe"},
+    {"color": [31, 255, 0], "id": 291, "isthing": 0, "name": "pitcher"},
+    {"color": [255, 31, 0], "id": 292, "isthing": 0, "name": "plant"},
+    {"color": [255, 224, 0], "id": 293, "isthing": 0, "name": "plastic"},
+    {"color": [153, 255, 0], "id": 294, "isthing": 0, "name": "plate"},
+    {"color": [0, 0, 255], "id": 295, "isthing": 0, "name": "platform"},
+    {"color": [255, 71, 0], "id": 296, "isthing": 0, "name": "player"},
+    {"color": [0, 235, 255], "id": 297, "isthing": 0, "name": "playground"},
+    {"color": [0, 173, 255], "id": 298, "isthing": 0, "name": "pliers"},
+    {"color": [31, 0, 255], "id": 299, "isthing": 0, "name": "plume"},
+    {"color": [120, 120, 120], "id": 300, "isthing": 0, "name": "poker"},
+    {"color": [180, 120, 120], "id": 301, "isthing": 0, "name": "poker chip"},
+    {"color": [6, 230, 230], "id": 302, "isthing": 0, "name": "pole"},
+    {"color": [80, 50, 50], "id": 303, "isthing": 0, "name": "pool table"},
+    {"color": [4, 200, 3], "id": 304, "isthing": 0, "name": "postcard"},
+    {"color": [120, 120, 80], "id": 305, "isthing": 0, "name": "poster"},
+    {"color": [140, 140, 140], "id": 306, "isthing": 0, "name": "pot"},
+    {"color": [204, 5, 255], "id": 307, "isthing": 0, "name": "pottedplant"},
+    {"color": [230, 230, 230], "id": 308, "isthing": 0, "name": "printer"},
+    {"color": [4, 250, 7], "id": 309, "isthing": 0, "name": "projector"},
+    {"color": [224, 5, 255], "id": 310, "isthing": 0, "name": "pumpkin"},
+    {"color": [235, 255, 7], "id": 311, "isthing": 0, "name": "rabbit"},
+    {"color": [150, 5, 61], "id": 312, "isthing": 0, "name": "racket"},
+    {"color": [120, 120, 70], "id": 313, "isthing": 0, "name": "radiator"},
+    {"color": [8, 255, 51], "id": 314, "isthing": 0, "name": "radio"},
+    {"color": [255, 6, 82], "id": 315, "isthing": 0, "name": "rail"},
+    {"color": [143, 255, 140], "id": 316, "isthing": 0, "name": "rake"},
+    {"color": [204, 255, 4], "id": 317, "isthing": 0, "name": "ramp"},
+    {"color": [255, 51, 7], "id": 318, "isthing": 0, "name": "range hood"},
+    {"color": [204, 70, 3], "id": 319, "isthing": 0, "name": "receiver"},
+    {"color": [0, 102, 200], "id": 320, "isthing": 0, "name": "recorder"},
+    {"color": [61, 230, 250], "id": 321, "isthing": 0, "name": "recreational machines"},
+    {"color": [255, 6, 51], "id": 322, "isthing": 0, "name": "remote control"},
+    {"color": [11, 102, 255], "id": 323, "isthing": 0, "name": "road"},
+    {"color": [255, 7, 71], "id": 324, "isthing": 0, "name": "robot"},
+    {"color": [255, 9, 224], "id": 325, "isthing": 0, "name": "rock"},
+    {"color": [9, 7, 230], "id": 326, "isthing": 0, "name": "rocket"},
+    {"color": [220, 220, 220], "id": 327, "isthing": 0, "name": "rocking horse"},
+    {"color": [255, 9, 92], "id": 328, "isthing": 0, "name": "rope"},
+    {"color": [112, 9, 255], "id": 329, "isthing": 0, "name": "rug"},
+    {"color": [8, 255, 214], "id": 330, "isthing": 0, "name": "ruler"},
+    {"color": [7, 255, 224], "id": 331, "isthing": 0, "name": "runway"},
+    {"color": [255, 184, 6], "id": 332, "isthing": 0, "name": "saddle"},
+    {"color": [10, 255, 71], "id": 333, "isthing": 0, "name": "sand"},
+    {"color": [255, 41, 10], "id": 334, "isthing": 0, "name": "saw"},
+    {"color": [7, 255, 255], "id": 335, "isthing": 0, "name": "scale"},
+    {"color": [224, 255, 8], "id": 336, "isthing": 0, "name": "scanner"},
+    {"color": [102, 8, 255], "id": 337, "isthing": 0, "name": "scissors"},
+    {"color": [255, 61, 6], "id": 338, "isthing": 0, "name": "scoop"},
+    {"color": [255, 194, 7], "id": 339, "isthing": 0, "name": "screen"},
+    {"color": [255, 122, 8], "id": 340, "isthing": 0, "name": "screwdriver"},
+    {"color": [0, 255, 20], "id": 341, "isthing": 0, "name": "sculpture"},
+    {"color": [255, 8, 41], "id": 342, "isthing": 0, "name": "scythe"},
+    {"color": [255, 5, 153], "id": 343, "isthing": 0, "name": "sewer"},
+    {"color": [6, 51, 255], "id": 344, "isthing": 0, "name": "sewing machine"},
+    {"color": [235, 12, 255], "id": 345, "isthing": 0, "name": "shed"},
+    {"color": [160, 150, 20], "id": 346, "isthing": 0, "name": "sheep"},
+    {"color": [0, 163, 255], "id": 347, "isthing": 0, "name": "shell"},
+    {"color": [140, 140, 140], "id": 348, "isthing": 0, "name": "shelves"},
+    {"color": [250, 10, 15], "id": 349, "isthing": 0, "name": "shoe"},
+    {"color": [20, 255, 0], "id": 350, "isthing": 0, "name": "shopping cart"},
+    {"color": [31, 255, 0], "id": 351, "isthing": 0, "name": "shovel"},
+    {"color": [255, 31, 0], "id": 352, "isthing": 0, "name": "sidecar"},
+    {"color": [255, 224, 0], "id": 353, "isthing": 0, "name": "sidewalk"},
+    {"color": [153, 255, 0], "id": 354, "isthing": 0, "name": "sign"},
+    {"color": [0, 0, 255], "id": 355, "isthing": 0, "name": "signal light"},
+    {"color": [255, 71, 0], "id": 356, "isthing": 0, "name": "sink"},
+    {"color": [0, 235, 255], "id": 357, "isthing": 0, "name": "skateboard"},
+    {"color": [0, 173, 255], "id": 358, "isthing": 0, "name": "ski"},
+    {"color": [31, 0, 255], "id": 359, "isthing": 0, "name": "sky"},
+    {"color": [120, 120, 120], "id": 360, "isthing": 0, "name": "sled"},
+    {"color": [180, 120, 120], "id": 361, "isthing": 0, "name": "slippers"},
+    {"color": [6, 230, 230], "id": 362, "isthing": 0, "name": "smoke"},
+    {"color": [80, 50, 50], "id": 363, "isthing": 0, "name": "snail"},
+    {"color": [4, 200, 3], "id": 364, "isthing": 0, "name": "snake"},
+    {"color": [120, 120, 80], "id": 365, "isthing": 0, "name": "snow"},
+    {"color": [140, 140, 140], "id": 366, "isthing": 0, "name": "snowmobiles"},
+    {"color": [204, 5, 255], "id": 367, "isthing": 0, "name": "sofa"},
+    {"color": [230, 230, 230], "id": 368, "isthing": 0, "name": "spanner"},
+    {"color": [4, 250, 7], "id": 369, "isthing": 0, "name": "spatula"},
+    {"color": [224, 5, 255], "id": 370, "isthing": 0, "name": "speaker"},
+    {"color": [235, 255, 7], "id": 371, "isthing": 0, "name": "speed bump"},
+    {"color": [150, 5, 61], "id": 372, "isthing": 0, "name": "spice container"},
+    {"color": [120, 120, 70], "id": 373, "isthing": 0, "name": "spoon"},
+    {"color": [8, 255, 51], "id": 374, "isthing": 0, "name": "sprayer"},
+    {"color": [255, 6, 82], "id": 375, "isthing": 0, "name": "squirrel"},
+    {"color": [143, 255, 140], "id": 376, "isthing": 0, "name": "stage"},
+    {"color": [204, 255, 4], "id": 377, "isthing": 0, "name": "stair"},
+    {"color": [255, 51, 7], "id": 378, "isthing": 0, "name": "stapler"},
+    {"color": [204, 70, 3], "id": 379, "isthing": 0, "name": "stick"},
+    {"color": [0, 102, 200], "id": 380, "isthing": 0, "name": "sticky note"},
+    {"color": [61, 230, 250], "id": 381, "isthing": 0, "name": "stone"},
+    {"color": [255, 6, 51], "id": 382, "isthing": 0, "name": "stool"},
+    {"color": [11, 102, 255], "id": 383, "isthing": 0, "name": "stove"},
+    {"color": [255, 7, 71], "id": 384, "isthing": 0, "name": "straw"},
+    {"color": [255, 9, 224], "id": 385, "isthing": 0, "name": "stretcher"},
+    {"color": [9, 7, 230], "id": 386, "isthing": 0, "name": "sun"},
+    {"color": [220, 220, 220], "id": 387, "isthing": 0, "name": "sunglass"},
+    {"color": [255, 9, 92], "id": 388, "isthing": 0, "name": "sunshade"},
+    {"color": [112, 9, 255], "id": 389, "isthing": 0, "name": "surveillance camera"},
+    {"color": [8, 255, 214], "id": 390, "isthing": 0, "name": "swan"},
+    {"color": [7, 255, 224], "id": 391, "isthing": 0, "name": "sweeper"},
+    {"color": [255, 184, 6], "id": 392, "isthing": 0, "name": "swim ring"},
+    {"color": [10, 255, 71], "id": 393, "isthing": 0, "name": "swimming pool"},
+    {"color": [255, 41, 10], "id": 394, "isthing": 0, "name": "swing"},
+    {"color": [7, 255, 255], "id": 395, "isthing": 0, "name": "switch"},
+    {"color": [224, 255, 8], "id": 396, "isthing": 0, "name": "table"},
+    {"color": [102, 8, 255], "id": 397, "isthing": 0, "name": "tableware"},
+    {"color": [255, 61, 6], "id": 398, "isthing": 0, "name": "tank"},
+    {"color": [255, 194, 7], "id": 399, "isthing": 0, "name": "tap"},
+    {"color": [255, 122, 8], "id": 400, "isthing": 0, "name": "tape"},
+    {"color": [0, 255, 20], "id": 401, "isthing": 0, "name": "tarp"},
+    {"color": [255, 8, 41], "id": 402, "isthing": 0, "name": "telephone"},
+    {"color": [255, 5, 153], "id": 403, "isthing": 0, "name": "telephone booth"},
+    {"color": [6, 51, 255], "id": 404, "isthing": 0, "name": "tent"},
+    {"color": [235, 12, 255], "id": 405, "isthing": 0, "name": "tire"},
+    {"color": [160, 150, 20], "id": 406, "isthing": 0, "name": "toaster"},
+    {"color": [0, 163, 255], "id": 407, "isthing": 0, "name": "toilet"},
+    {"color": [140, 140, 140], "id": 408, "isthing": 0, "name": "tong"},
+    {"color": [250, 10, 15], "id": 409, "isthing": 0, "name": "tool"},
+    {"color": [20, 255, 0], "id": 410, "isthing": 0, "name": "toothbrush"},
+    {"color": [31, 255, 0], "id": 411, "isthing": 0, "name": "towel"},
+    {"color": [255, 31, 0], "id": 412, "isthing": 0, "name": "toy"},
+    {"color": [255, 224, 0], "id": 413, "isthing": 0, "name": "toy car"},
+    {"color": [153, 255, 0], "id": 414, "isthing": 0, "name": "track"},
+    {"color": [0, 0, 255], "id": 415, "isthing": 0, "name": "train"},
+    {"color": [255, 71, 0], "id": 416, "isthing": 0, "name": "trampoline"},
+    {"color": [0, 235, 255], "id": 417, "isthing": 0, "name": "trash bin"},
+    {"color": [0, 173, 255], "id": 418, "isthing": 0, "name": "tray"},
+    {"color": [31, 0, 255], "id": 419, "isthing": 0, "name": "tree"},
+    {"color": [120, 120, 120], "id": 420, "isthing": 0, "name": "tricycle"},
+    {"color": [180, 120, 120], "id": 421, "isthing": 0, "name": "tripod"},
+    {"color": [6, 230, 230], "id": 422, "isthing": 0, "name": "trophy"},
+    {"color": [80, 50, 50], "id": 423, "isthing": 0, "name": "truck"},
+    {"color": [4, 200, 3], "id": 424, "isthing": 0, "name": "tube"},
+    {"color": [120, 120, 80], "id": 425, "isthing": 0, "name": "turtle"},
+    {"color": [140, 140, 140], "id": 426, "isthing": 0, "name": "tvmonitor"},
+    {"color": [204, 5, 255], "id": 427, "isthing": 0, "name": "tweezers"},
+    {"color": [230, 230, 230], "id": 428, "isthing": 0, "name": "typewriter"},
+    {"color": [4, 250, 7], "id": 429, "isthing": 0, "name": "umbrella"},
+    {"color": [224, 5, 255], "id": 430, "isthing": 0, "name": "unknown"},
+    {"color": [235, 255, 7], "id": 431, "isthing": 0, "name": "vacuum cleaner"},
+    {"color": [150, 5, 61], "id": 432, "isthing": 0, "name": "vending machine"},
+    {"color": [120, 120, 70], "id": 433, "isthing": 0, "name": "video camera"},
+    {"color": [8, 255, 51], "id": 434, "isthing": 0, "name": "video game console"},
+    {"color": [255, 6, 82], "id": 435, "isthing": 0, "name": "video player"},
+    {"color": [143, 255, 140], "id": 436, "isthing": 0, "name": "video tape"},
+    {"color": [204, 255, 4], "id": 437, "isthing": 0, "name": "violin"},
+    {"color": [255, 51, 7], "id": 438, "isthing": 0, "name": "wakeboard"},
+    {"color": [204, 70, 3], "id": 439, "isthing": 0, "name": "wall"},
+    {"color": [0, 102, 200], "id": 440, "isthing": 0, "name": "wallet"},
+    {"color": [61, 230, 250], "id": 441, "isthing": 0, "name": "wardrobe"},
+    {"color": [255, 6, 51], "id": 442, "isthing": 0, "name": "washing machine"},
+    {"color": [11, 102, 255], "id": 443, "isthing": 0, "name": "watch"},
+    {"color": [255, 7, 71], "id": 444, "isthing": 0, "name": "water"},
+    {"color": [255, 9, 224], "id": 445, "isthing": 0, "name": "water dispenser"},
+    {"color": [9, 7, 230], "id": 446, "isthing": 0, "name": "water pipe"},
+    {"color": [220, 220, 220], "id": 447, "isthing": 0, "name": "water skate board"},
+    {"color": [255, 9, 92], "id": 448, "isthing": 0, "name": "watermelon"},
+    {"color": [112, 9, 255], "id": 449, "isthing": 0, "name": "whale"},
+    {"color": [8, 255, 214], "id": 450, "isthing": 0, "name": "wharf"},
+    {"color": [7, 255, 224], "id": 451, "isthing": 0, "name": "wheel"},
+    {"color": [255, 184, 6], "id": 452, "isthing": 0, "name": "wheelchair"},
+    {"color": [10, 255, 71], "id": 453, "isthing": 0, "name": "window"},
+    {"color": [255, 41, 10], "id": 454, "isthing": 0, "name": "window blinds"},
+    {"color": [7, 255, 255], "id": 455, "isthing": 0, "name": "wineglass"},
+    {"color": [224, 255, 8], "id": 456, "isthing": 0, "name": "wire"},
+    {"color": [102, 8, 255], "id": 457, "isthing": 0, "name": "wood"},
+    {"color": [255, 61, 6], "id": 458, "isthing": 0, "name": "wool"},
+]
+
+PASCAL_CTX_59_CATEGORIES = [
+    {"color": [180, 120, 120], "id": 0, "isthing": 0, "name": "aeroplane"},
+    {"color": [6, 230, 230], "id": 1, "isthing": 0, "name": "bag"},
+    {"color": [80, 50, 50], "id": 2, "isthing": 0, "name": "bed"},
+    {"color": [4, 200, 3], "id": 3, "isthing": 0, "name": "bedclothes"},
+    {"color": [120, 120, 80], "id": 4, "isthing": 0, "name": "bench"},
+    {"color": [140, 140, 140], "id": 5, "isthing": 0, "name": "bicycle"},
+    {"color": [204, 5, 255], "id": 6, "isthing": 0, "name": "bird"},
+    {"color": [230, 230, 230], "id": 7, "isthing": 0, "name": "boat"},
+    {"color": [4, 250, 7], "id": 8, "isthing": 0, "name": "book"},
+    {"color": [224, 5, 255], "id": 9, "isthing": 0, "name": "bottle"},
+    {"color": [235, 255, 7], "id": 10, "isthing": 0, "name": "building"},
+    {"color": [150, 5, 61], "id": 11, "isthing": 0, "name": "bus"},
+    {"color": [120, 120, 70], "id": 12, "isthing": 0, "name": "cabinet"},
+    {"color": [8, 255, 51], "id": 13, "isthing": 0, "name": "car"},
+    {"color": [255, 6, 82], "id": 14, "isthing": 0, "name": "cat"},
+    {"color": [143, 255, 140], "id": 15, "isthing": 0, "name": "ceiling"},
+    {"color": [204, 255, 4], "id": 16, "isthing": 0, "name": "chair"},
+    {"color": [255, 51, 7], "id": 17, "isthing": 0, "name": "cloth"},
+    {"color": [204, 70, 3], "id": 18, "isthing": 0, "name": "computer"},
+    {"color": [0, 102, 200], "id": 19, "isthing": 0, "name": "cow"},
+    {"color": [61, 230, 250], "id": 20, "isthing": 0, "name": "cup"},
+    {"color": [255, 6, 51], "id": 21, "isthing": 0, "name": "curtain"},
+    {"color": [11, 102, 255], "id": 22, "isthing": 0, "name": "dog"},
+    {"color": [255, 7, 71], "id": 23, "isthing": 0, "name": "door"},
+    {"color": [255, 9, 224], "id": 24, "isthing": 0, "name": "fence"},
+    {"color": [9, 7, 230], "id": 25, "isthing": 0, "name": "floor"},
+    {"color": [220, 220, 220], "id": 26, "isthing": 0, "name": "flower"},
+    {"color": [255, 9, 92], "id": 27, "isthing": 0, "name": "food"},
+    {"color": [112, 9, 255], "id": 28, "isthing": 0, "name": "grass"},
+    {"color": [8, 255, 214], "id": 29, "isthing": 0, "name": "ground"},
+    {"color": [7, 255, 224], "id": 30, "isthing": 0, "name": "horse"},
+    {"color": [255, 184, 6], "id": 31, "isthing": 0, "name": "keyboard"},
+    {"color": [10, 255, 71], "id": 32, "isthing": 0, "name": "light"},
+    {"color": [255, 41, 10], "id": 33, "isthing": 0, "name": "motorbike"},
+    {"color": [7, 255, 255], "id": 34, "isthing": 0, "name": "mountain"},
+    {"color": [224, 255, 8], "id": 35, "isthing": 0, "name": "mouse"},
+    {"color": [102, 8, 255], "id": 36, "isthing": 0, "name": "person"},
+    {"color": [255, 61, 6], "id": 37, "isthing": 0, "name": "plate"},
+    {"color": [255, 194, 7], "id": 38, "isthing": 0, "name": "platform"},
+    {"color": [255, 122, 8], "id": 39, "isthing": 0, "name": "pottedplant"},
+    {"color": [0, 255, 20], "id": 40, "isthing": 0, "name": "road"},
+    {"color": [255, 8, 41], "id": 41, "isthing": 0, "name": "rock"},
+    {"color": [255, 5, 153], "id": 42, "isthing": 0, "name": "sheep"},
+    {"color": [6, 51, 255], "id": 43, "isthing": 0, "name": "shelves"},
+    {"color": [235, 12, 255], "id": 44, "isthing": 0, "name": "sidewalk"},
+    {"color": [160, 150, 20], "id": 45, "isthing": 0, "name": "sign"},
+    {"color": [0, 163, 255], "id": 46, "isthing": 0, "name": "sky"},
+    {"color": [140, 140, 140], "id": 47, "isthing": 0, "name": "snow"},
+    {"color": [250, 10, 15], "id": 48, "isthing": 0, "name": "sofa"},
+    {"color": [20, 255, 0], "id": 49, "isthing": 0, "name": "diningtable"},
+    {"color": [31, 255, 0], "id": 50, "isthing": 0, "name": "track"},
+    {"color": [255, 31, 0], "id": 51, "isthing": 0, "name": "train"},
+    {"color": [255, 224, 0], "id": 52, "isthing": 0, "name": "tree"},
+    {"color": [153, 255, 0], "id": 53, "isthing": 0, "name": "truck"},
+    {"color": [0, 0, 255], "id": 54, "isthing": 0, "name": "tvmonitor"},
+    {"color": [255, 71, 0], "id": 55, "isthing": 0, "name": "wall"},
+    {"color": [0, 235, 255], "id": 56, "isthing": 0, "name": "water"},
+    {"color": [0, 173, 255], "id": 57, "isthing": 0, "name": "window"},
+    {"color": [31, 0, 255], "id": 58, "isthing": 0, "name": "wood"},
+]
+
+MAPILLARY_VISTAS_SEM_SEG_CATEGORIES = [
+    {'color': [165, 42, 42],
+    'id': 1,
+    'isthing': 1,
+    'name': 'Bird',
+    'supercategory': 'animal--bird'},
+    {'color': [0, 192, 0],
+    'id': 2,
+    'isthing': 1,
+    'name': 'Ground Animal',
+    'supercategory': 'animal--ground-animal'},
+    {'color': [196, 196, 196],
+    'id': 3,
+    'isthing': 0,
+    'name': 'Curb',
+    'supercategory': 'construction--barrier--curb'},
+    {'color': [190, 153, 153],
+    'id': 4,
+    'isthing': 0,
+    'name': 'Fence',
+    'supercategory': 'construction--barrier--fence'},
+    {'color': [180, 165, 180],
+    'id': 5,
+    'isthing': 0,
+    'name': 'Guard Rail',
+    'supercategory': 'construction--barrier--guard-rail'},
+    {'color': [90, 120, 150],
+    'id': 6,
+    'isthing': 0,
+    'name': 'Barrier',
+    'supercategory': 'construction--barrier--other-barrier'},
+    {'color': [102, 102, 156],
+    'id': 7,
+    'isthing': 0,
+    'name': 'Wall',
+    'supercategory': 'construction--barrier--wall'},
+    {'color': [128, 64, 255],
+    'id': 8,
+    'isthing': 0,
+    'name': 'Bike Lane',
+    'supercategory': 'construction--flat--bike-lane'},
+    {'color': [140, 140, 200],
+    'id': 9,
+    'isthing': 1,
+    'name': 'Crosswalk - Plain',
+    'supercategory': 'construction--flat--crosswalk-plain'},
+    {'color': [170, 170, 170],
+    'id': 10,
+    'isthing': 0,
+    'name': 'Curb Cut',
+    'supercategory': 'construction--flat--curb-cut'},
+    {'color': [250, 170, 160],
+    'id': 11,
+    'isthing': 0,
+    'name': 'Parking',
+    'supercategory': 'construction--flat--parking'},
+    {'color': [96, 96, 96],
+    'id': 12,
+    'isthing': 0,
+    'name': 'Pedestrian Area',
+    'supercategory': 'construction--flat--pedestrian-area'},
+    {'color': [230, 150, 140],
+    'id': 13,
+    'isthing': 0,
+    'name': 'Rail Track',
+    'supercategory': 'construction--flat--rail-track'},
+    {'color': [128, 64, 128],
+    'id': 14,
+    'isthing': 0,
+    'name': 'Road',
+    'supercategory': 'construction--flat--road'},
+    {'color': [110, 110, 110],
+    'id': 15,
+    'isthing': 0,
+    'name': 'Service Lane',
+    'supercategory': 'construction--flat--service-lane'},
+    {'color': [244, 35, 232],
+    'id': 16,
+    'isthing': 0,
+    'name': 'Sidewalk',
+    'supercategory': 'construction--flat--sidewalk'},
+    {'color': [150, 100, 100],
+    'id': 17,
+    'isthing': 0,
+    'name': 'Bridge',
+    'supercategory': 'construction--structure--bridge'},
+    {'color': [70, 70, 70],
+    'id': 18,
+    'isthing': 0,
+    'name': 'Building',
+    'supercategory': 'construction--structure--building'},
+    {'color': [150, 120, 90],
+    'id': 19,
+    'isthing': 0,
+    'name': 'Tunnel',
+    'supercategory': 'construction--structure--tunnel'},
+    {'color': [220, 20, 60],
+    'id': 20,
+    'isthing': 1,
+    'name': 'Person',
+    'supercategory': 'human--person'},
+    {'color': [255, 0, 0],
+    'id': 21,
+    'isthing': 1,
+    'name': 'Bicyclist',
+    'supercategory': 'human--rider--bicyclist'},
+    {'color': [255, 0, 100],
+    'id': 22,
+    'isthing': 1,
+    'name': 'Motorcyclist',
+    'supercategory': 'human--rider--motorcyclist'},
+    {'color': [255, 0, 200],
+    'id': 23,
+    'isthing': 1,
+    'name': 'Other Rider',
+    'supercategory': 'human--rider--other-rider'},
+    {'color': [200, 128, 128],
+    'id': 24,
+    'isthing': 1,
+    'name': 'Lane Marking - Crosswalk',
+    'supercategory': 'marking--crosswalk-zebra'},
+    {'color': [255, 255, 255],
+    'id': 25,
+    'isthing': 0,
+    'name': 'Lane Marking - General',
+    'supercategory': 'marking--general'},
+    {'color': [64, 170, 64],
+    'id': 26,
+    'isthing': 0,
+    'name': 'Mountain',
+    'supercategory': 'nature--mountain'},
+    {'color': [230, 160, 50],
+    'id': 27,
+    'isthing': 0,
+    'name': 'Sand',
+    'supercategory': 'nature--sand'},
+    {'color': [70, 130, 180],
+    'id': 28,
+    'isthing': 0,
+    'name': 'Sky',
+    'supercategory': 'nature--sky'},
+    {'color': [190, 255, 255],
+    'id': 29,
+    'isthing': 0,
+    'name': 'Snow',
+    'supercategory': 'nature--snow'},
+    {'color': [152, 251, 152],
+    'id': 30,
+    'isthing': 0,
+    'name': 'Terrain',
+    'supercategory': 'nature--terrain'},
+    {'color': [107, 142, 35],
+    'id': 31,
+    'isthing': 0,
+    'name': 'Vegetation',
+    'supercategory': 'nature--vegetation'},
+    {'color': [0, 170, 30],
+    'id': 32,
+    'isthing': 0,
+    'name': 'Water',
+    'supercategory': 'nature--water'},
+    {'color': [255, 255, 128],
+    'id': 33,
+    'isthing': 1,
+    'name': 'Banner',
+    'supercategory': 'object--banner'},
+    {'color': [250, 0, 30],
+    'id': 34,
+    'isthing': 1,
+    'name': 'Bench',
+    'supercategory': 'object--bench'},
+    {'color': [100, 140, 180],
+    'id': 35,
+    'isthing': 1,
+    'name': 'Bike Rack',
+    'supercategory': 'object--bike-rack'},
+    {'color': [220, 220, 220],
+    'id': 36,
+    'isthing': 1,
+    'name': 'Billboard',
+    'supercategory': 'object--billboard'},
+    {'color': [220, 128, 128],
+    'id': 37,
+    'isthing': 1,
+    'name': 'Catch Basin',
+    'supercategory': 'object--catch-basin'},
+    {'color': [222, 40, 40],
+    'id': 38,
+    'isthing': 1,
+    'name': 'CCTV Camera',
+    'supercategory': 'object--cctv-camera'},
+    {'color': [100, 170, 30],
+    'id': 39,
+    'isthing': 1,
+    'name': 'Fire Hydrant',
+    'supercategory': 'object--fire-hydrant'},
+    {'color': [40, 40, 40],
+    'id': 40,
+    'isthing': 1,
+    'name': 'Junction Box',
+    'supercategory': 'object--junction-box'},
+    {'color': [33, 33, 33],
+    'id': 41,
+    'isthing': 1,
+    'name': 'Mailbox',
+    'supercategory': 'object--mailbox'},
+    {'color': [100, 128, 160],
+    'id': 42,
+    'isthing': 1,
+    'name': 'Manhole',
+    'supercategory': 'object--manhole'},
+    {'color': [142, 0, 0],
+    'id': 43,
+    'isthing': 1,
+    'name': 'Phone Booth',
+    'supercategory': 'object--phone-booth'},
+    {'color': [70, 100, 150],
+    'id': 44,
+    'isthing': 0,
+    'name': 'Pothole',
+    'supercategory': 'object--pothole'},
+    {'color': [210, 170, 100],
+    'id': 45,
+    'isthing': 1,
+    'name': 'Street Light',
+    'supercategory': 'object--street-light'},
+    {'color': [153, 153, 153],
+    'id': 46,
+    'isthing': 1,
+    'name': 'Pole',
+    'supercategory': 'object--support--pole'},
+    {'color': [128, 128, 128],
+    'id': 47,
+    'isthing': 1,
+    'name': 'Traffic Sign Frame',
+    'supercategory': 'object--support--traffic-sign-frame'},
+    {'color': [0, 0, 80],
+    'id': 48,
+    'isthing': 1,
+    'name': 'Utility Pole',
+    'supercategory': 'object--support--utility-pole'},
+    {'color': [250, 170, 30],
+    'id': 49,
+    'isthing': 1,
+    'name': 'Traffic Light',
+    'supercategory': 'object--traffic-light'},
+    {'color': [192, 192, 192],
+    'id': 50,
+    'isthing': 1,
+    'name': 'Traffic Sign (Back)',
+    'supercategory': 'object--traffic-sign--back'},
+    {'color': [220, 220, 0],
+    'id': 51,
+    'isthing': 1,
+    'name': 'Traffic Sign (Front)',
+    'supercategory': 'object--traffic-sign--front'},
+    {'color': [140, 140, 20],
+    'id': 52,
+    'isthing': 1,
+    'name': 'Trash Can',
+    'supercategory': 'object--trash-can'},
+    {'color': [119, 11, 32],
+    'id': 53,
+    'isthing': 1,
+    'name': 'Bicycle',
+    'supercategory': 'object--vehicle--bicycle'},
+    {'color': [150, 0, 255],
+    'id': 54,
+    'isthing': 1,
+    'name': 'Boat',
+    'supercategory': 'object--vehicle--boat'},
+    {'color': [0, 60, 100],
+    'id': 55,
+    'isthing': 1,
+    'name': 'Bus',
+    'supercategory': 'object--vehicle--bus'},
+    {'color': [0, 0, 142],
+    'id': 56,
+    'isthing': 1,
+    'name': 'Car',
+    'supercategory': 'object--vehicle--car'},
+    {'color': [0, 0, 90],
+    'id': 57,
+    'isthing': 1,
+    'name': 'Caravan',
+    'supercategory': 'object--vehicle--caravan'},
+    {'color': [0, 0, 230],
+    'id': 58,
+    'isthing': 1,
+    'name': 'Motorcycle',
+    'supercategory': 'object--vehicle--motorcycle'},
+    {'color': [0, 80, 100],
+    'id': 59,
+    'isthing': 0,
+    'name': 'On Rails',
+    'supercategory': 'object--vehicle--on-rails'},
+    {'color': [128, 64, 64],
+    'id': 60,
+    'isthing': 1,
+    'name': 'Other Vehicle',
+    'supercategory': 'object--vehicle--other-vehicle'},
+    {'color': [0, 0, 110],
+    'id': 61,
+    'isthing': 1,
+    'name': 'Trailer',
+    'supercategory': 'object--vehicle--trailer'},
+    {'color': [0, 0, 70],
+    'id': 62,
+    'isthing': 1,
+    'name': 'Truck',
+    'supercategory': 'object--vehicle--truck'},
+    {'color': [0, 0, 192],
+    'id': 63,
+    'isthing': 1,
+    'name': 'Wheeled Slow',
+    'supercategory': 'object--vehicle--wheeled-slow'},
+    {'color': [32, 32, 32],
+    'id': 64,
+    'isthing': 0,
+    'name': 'Car Mount',
+    'supercategory': 'void--car-mount'},
+    {'color': [120, 10, 10],
+    'id': 65,
+    'isthing': 0,
+    'name': 'Ego Vehicle',
+    'supercategory': 'void--ego-vehicle'}
+]
+
+COCO_STUFF_CATEGORIES = [
+    {"color": [220, 20, 60], "isthing": 1, "id": 1, "name": "person"},
+    {"color": [119, 11, 32], "isthing": 1, "id": 2, "name": "bicycle"},
+    {"color": [0, 0, 142], "isthing": 1, "id": 3, "name": "car"},
+    {"color": [0, 0, 230], "isthing": 1, "id": 4, "name": "motorcycle"},
+    {"color": [106, 0, 228], "isthing": 1, "id": 5, "name": "airplane"},
+    {"color": [0, 60, 100], "isthing": 1, "id": 6, "name": "bus"},
+    {"color": [0, 80, 100], "isthing": 1, "id": 7, "name": "train"},
+    {"color": [0, 0, 70], "isthing": 1, "id": 8, "name": "truck"},
+    {"color": [0, 0, 192], "isthing": 1, "id": 9, "name": "boat"},
+    {"color": [250, 170, 30], "isthing": 1, "id": 10, "name": "traffic light"},
+    {"color": [100, 170, 30], "isthing": 1, "id": 11, "name": "fire hydrant"},
+    {"color": [220, 220, 0], "isthing": 1, "id": 13, "name": "stop sign"},
+    {"color": [175, 116, 175], "isthing": 1, "id": 14, "name": "parking meter"},
+    {"color": [250, 0, 30], "isthing": 1, "id": 15, "name": "bench"},
+    {"color": [165, 42, 42], "isthing": 1, "id": 16, "name": "bird"},
+    {"color": [255, 77, 255], "isthing": 1, "id": 17, "name": "cat"},
+    {"color": [0, 226, 252], "isthing": 1, "id": 18, "name": "dog"},
+    {"color": [182, 182, 255], "isthing": 1, "id": 19, "name": "horse"},
+    {"color": [0, 82, 0], "isthing": 1, "id": 20, "name": "sheep"},
+    {"color": [120, 166, 157], "isthing": 1, "id": 21, "name": "cow"},
+    {"color": [110, 76, 0], "isthing": 1, "id": 22, "name": "elephant"},
+    {"color": [174, 57, 255], "isthing": 1, "id": 23, "name": "bear"},
+    {"color": [199, 100, 0], "isthing": 1, "id": 24, "name": "zebra"},
+    {"color": [72, 0, 118], "isthing": 1, "id": 25, "name": "giraffe"},
+    {"color": [255, 179, 240], "isthing": 1, "id": 27, "name": "backpack"},
+    {"color": [0, 125, 92], "isthing": 1, "id": 28, "name": "umbrella"},
+    {"color": [209, 0, 151], "isthing": 1, "id": 31, "name": "handbag"},
+    {"color": [188, 208, 182], "isthing": 1, "id": 32, "name": "tie"},
+    {"color": [0, 220, 176], "isthing": 1, "id": 33, "name": "suitcase"},
+    {"color": [255, 99, 164], "isthing": 1, "id": 34, "name": "frisbee"},
+    {"color": [92, 0, 73], "isthing": 1, "id": 35, "name": "skis"},
+    {"color": [133, 129, 255], "isthing": 1, "id": 36, "name": "snowboard"},
+    {"color": [78, 180, 255], "isthing": 1, "id": 37, "name": "sports ball"},
+    {"color": [0, 228, 0], "isthing": 1, "id": 38, "name": "kite"},
+    {"color": [174, 255, 243], "isthing": 1, "id": 39, "name": "baseball bat"},
+    {"color": [45, 89, 255], "isthing": 1, "id": 40, "name": "baseball glove"},
+    {"color": [134, 134, 103], "isthing": 1, "id": 41, "name": "skateboard"},
+    {"color": [145, 148, 174], "isthing": 1, "id": 42, "name": "surfboard"},
+    {"color": [255, 208, 186], "isthing": 1, "id": 43, "name": "tennis racket"},
+    {"color": [197, 226, 255], "isthing": 1, "id": 44, "name": "bottle"},
+    {"color": [171, 134, 1], "isthing": 1, "id": 46, "name": "wine glass"},
+    {"color": [109, 63, 54], "isthing": 1, "id": 47, "name": "cup"},
+    {"color": [207, 138, 255], "isthing": 1, "id": 48, "name": "fork"},
+    {"color": [151, 0, 95], "isthing": 1, "id": 49, "name": "knife"},
+    {"color": [9, 80, 61], "isthing": 1, "id": 50, "name": "spoon"},
+    {"color": [84, 105, 51], "isthing": 1, "id": 51, "name": "bowl"},
+    {"color": [74, 65, 105], "isthing": 1, "id": 52, "name": "banana"},
+    {"color": [166, 196, 102], "isthing": 1, "id": 53, "name": "apple"},
+    {"color": [208, 195, 210], "isthing": 1, "id": 54, "name": "sandwich"},
+    {"color": [255, 109, 65], "isthing": 1, "id": 55, "name": "orange"},
+    {"color": [0, 143, 149], "isthing": 1, "id": 56, "name": "broccoli"},
+    {"color": [179, 0, 194], "isthing": 1, "id": 57, "name": "carrot"},
+    {"color": [209, 99, 106], "isthing": 1, "id": 58, "name": "hot dog"},
+    {"color": [5, 121, 0], "isthing": 1, "id": 59, "name": "pizza"},
+    {"color": [227, 255, 205], "isthing": 1, "id": 60, "name": "donut"},
+    {"color": [147, 186, 208], "isthing": 1, "id": 61, "name": "cake"},
+    {"color": [153, 69, 1], "isthing": 1, "id": 62, "name": "chair"},
+    {"color": [3, 95, 161], "isthing": 1, "id": 63, "name": "couch"},
+    {"color": [163, 255, 0], "isthing": 1, "id": 64, "name": "potted plant"},
+    {"color": [119, 0, 170], "isthing": 1, "id": 65, "name": "bed"},
+    {"color": [0, 182, 199], "isthing": 1, "id": 67, "name": "dining table"},
+    {"color": [0, 165, 120], "isthing": 1, "id": 70, "name": "toilet"},
+    {"color": [183, 130, 88], "isthing": 1, "id": 72, "name": "tv"},
+    {"color": [95, 32, 0], "isthing": 1, "id": 73, "name": "laptop"},
+    {"color": [130, 114, 135], "isthing": 1, "id": 74, "name": "mouse"},
+    {"color": [110, 129, 133], "isthing": 1, "id": 75, "name": "remote"},
+    {"color": [166, 74, 118], "isthing": 1, "id": 76, "name": "keyboard"},
+    {"color": [219, 142, 185], "isthing": 1, "id": 77, "name": "cell phone"},
+    {"color": [79, 210, 114], "isthing": 1, "id": 78, "name": "microwave"},
+    {"color": [178, 90, 62], "isthing": 1, "id": 79, "name": "oven"},
+    {"color": [65, 70, 15], "isthing": 1, "id": 80, "name": "toaster"},
+    {"color": [127, 167, 115], "isthing": 1, "id": 81, "name": "sink"},
+    {"color": [59, 105, 106], "isthing": 1, "id": 82, "name": "refrigerator"},
+    {"color": [142, 108, 45], "isthing": 1, "id": 84, "name": "book"},
+    {"color": [196, 172, 0], "isthing": 1, "id": 85, "name": "clock"},
+    {"color": [95, 54, 80], "isthing": 1, "id": 86, "name": "vase"},
+    {"color": [128, 76, 255], "isthing": 1, "id": 87, "name": "scissors"},
+    {"color": [201, 57, 1], "isthing": 1, "id": 88, "name": "teddy bear"},
+    {"color": [246, 0, 122], "isthing": 1, "id": 89, "name": "hair drier"},
+    {"color": [191, 162, 208], "isthing": 1, "id": 90, "name": "toothbrush"},
+    {"id": 92, "name": "banner", "supercategory": "textile"},
+    {"id": 93, "name": "blanket", "supercategory": "textile"},
+    {"id": 94, "name": "branch", "supercategory": "plant"},
+    {"id": 95, "name": "bridge", "supercategory": "building"},
+    {"id": 96, "name": "building-other", "supercategory": "building"},
+    {"id": 97, "name": "bush", "supercategory": "plant"},
+    {"id": 98, "name": "cabinet", "supercategory": "furniture-stuff"},
+    {"id": 99, "name": "cage", "supercategory": "structural"},
+    {"id": 100, "name": "cardboard", "supercategory": "raw-material"},
+    {"id": 101, "name": "carpet", "supercategory": "floor"},
+    {"id": 102, "name": "ceiling-other", "supercategory": "ceiling"},
+    {"id": 103, "name": "ceiling-tile", "supercategory": "ceiling"},
+    {"id": 104, "name": "cloth", "supercategory": "textile"},
+    {"id": 105, "name": "clothes", "supercategory": "textile"},
+    {"id": 106, "name": "clouds", "supercategory": "sky"},
+    {"id": 107, "name": "counter", "supercategory": "furniture-stuff"},
+    {"id": 108, "name": "cupboard", "supercategory": "furniture-stuff"},
+    {"id": 109, "name": "curtain", "supercategory": "textile"},
+    {"id": 110, "name": "desk-stuff", "supercategory": "furniture-stuff"},
+    {"id": 111, "name": "dirt", "supercategory": "ground"},
+    {"id": 112, "name": "door-stuff", "supercategory": "furniture-stuff"},
+    {"id": 113, "name": "fence", "supercategory": "structural"},
+    {"id": 114, "name": "floor-marble", "supercategory": "floor"},
+    {"id": 115, "name": "floor-other", "supercategory": "floor"},
+    {"id": 116, "name": "floor-stone", "supercategory": "floor"},
+    {"id": 117, "name": "floor-tile", "supercategory": "floor"},
+    {"id": 118, "name": "floor-wood", "supercategory": "floor"},
+    {"id": 119, "name": "flower", "supercategory": "plant"},
+    {"id": 120, "name": "fog", "supercategory": "water"},
+    {"id": 121, "name": "food-other", "supercategory": "food-stuff"},
+    {"id": 122, "name": "fruit", "supercategory": "food-stuff"},
+    {"id": 123, "name": "furniture-other", "supercategory": "furniture-stuff"},
+    {"id": 124, "name": "grass", "supercategory": "plant"},
+    {"id": 125, "name": "gravel", "supercategory": "ground"},
+    {"id": 126, "name": "ground-other", "supercategory": "ground"},
+    {"id": 127, "name": "hill", "supercategory": "solid"},
+    {"id": 128, "name": "house", "supercategory": "building"},
+    {"id": 129, "name": "leaves", "supercategory": "plant"},
+    {"id": 130, "name": "light", "supercategory": "furniture-stuff"},
+    {"id": 131, "name": "mat", "supercategory": "textile"},
+    {"id": 132, "name": "metal", "supercategory": "raw-material"},
+    {"id": 133, "name": "mirror-stuff", "supercategory": "furniture-stuff"},
+    {"id": 134, "name": "moss", "supercategory": "plant"},
+    {"id": 135, "name": "mountain", "supercategory": "solid"},
+    {"id": 136, "name": "mud", "supercategory": "ground"},
+    {"id": 137, "name": "napkin", "supercategory": "textile"},
+    {"id": 138, "name": "net", "supercategory": "structural"},
+    {"id": 139, "name": "paper", "supercategory": "raw-material"},
+    {"id": 140, "name": "pavement", "supercategory": "ground"},
+    {"id": 141, "name": "pillow", "supercategory": "textile"},
+    {"id": 142, "name": "plant-other", "supercategory": "plant"},
+    {"id": 143, "name": "plastic", "supercategory": "raw-material"},
+    {"id": 144, "name": "platform", "supercategory": "ground"},
+    {"id": 145, "name": "playingfield", "supercategory": "ground"},
+    {"id": 146, "name": "railing", "supercategory": "structural"},
+    {"id": 147, "name": "railroad", "supercategory": "ground"},
+    {"id": 148, "name": "river", "supercategory": "water"},
+    {"id": 149, "name": "road", "supercategory": "ground"},
+    {"id": 150, "name": "rock", "supercategory": "solid"},
+    {"id": 151, "name": "roof", "supercategory": "building"},
+    {"id": 152, "name": "rug", "supercategory": "textile"},
+    {"id": 153, "name": "salad", "supercategory": "food-stuff"},
+    {"id": 154, "name": "sand", "supercategory": "ground"},
+    {"id": 155, "name": "sea", "supercategory": "water"},
+    {"id": 156, "name": "shelf", "supercategory": "furniture-stuff"},
+    {"id": 157, "name": "sky-other", "supercategory": "sky"},
+    {"id": 158, "name": "skyscraper", "supercategory": "building"},
+    {"id": 159, "name": "snow", "supercategory": "ground"},
+    {"id": 160, "name": "solid-other", "supercategory": "solid"},
+    {"id": 161, "name": "stairs", "supercategory": "furniture-stuff"},
+    {"id": 162, "name": "stone", "supercategory": "solid"},
+    {"id": 163, "name": "straw", "supercategory": "plant"},
+    {"id": 164, "name": "structural-other", "supercategory": "structural"},
+    {"id": 165, "name": "table", "supercategory": "furniture-stuff"},
+    {"id": 166, "name": "tent", "supercategory": "building"},
+    {"id": 167, "name": "textile-other", "supercategory": "textile"},
+    {"id": 168, "name": "towel", "supercategory": "textile"},
+    {"id": 169, "name": "tree", "supercategory": "plant"},
+    {"id": 170, "name": "vegetable", "supercategory": "food-stuff"},
+    {"id": 171, "name": "wall-brick", "supercategory": "wall"},
+    {"id": 172, "name": "wall-concrete", "supercategory": "wall"},
+    {"id": 173, "name": "wall-other", "supercategory": "wall"},
+    {"id": 174, "name": "wall-panel", "supercategory": "wall"},
+    {"id": 175, "name": "wall-stone", "supercategory": "wall"},
+    {"id": 176, "name": "wall-tile", "supercategory": "wall"},
+    {"id": 177, "name": "wall-wood", "supercategory": "wall"},
+    {"id": 178, "name": "water-other", "supercategory": "water"},
+    {"id": 179, "name": "waterdrops", "supercategory": "water"},
+    {"id": 180, "name": "window-blind", "supercategory": "window"},
+    {"id": 181, "name": "window-other", "supercategory": "window"},
+    {"id": 182, "name": "wood", "supercategory": "solid"},
+]
+
+
+def get_coco_categories_with_prompt_eng():
+    COCO_CATEGORIES_ = copy.deepcopy(COCO_CATEGORIES)
+    coco_id_names = open('./mask_adapter/data/datasets/coco_panoptic_with_prompt_eng.txt').read().splitlines()
+    coco_idx = 0
+    for line in coco_id_names:
+        idx, name = line.split(':')
+        idx = int(idx)
+        if idx == 0 or name == "invalid_class_id":
+            continue
+        #print(COCO_CATEGORIES_[coco_idx]["name"], '->', name)
+        assert COCO_CATEGORIES_[coco_idx]["id"] == idx
+        COCO_CATEGORIES_[coco_idx]["name"] = name
+        coco_idx += 1
+    return COCO_CATEGORIES_
+
+
+def get_coco_stuff_categories_with_prompt_eng():
+    COCO_STUFF_CATEGORIES_ = copy.deepcopy(COCO_STUFF_CATEGORIES)
+    coco_id_names = open('./mask_adapter/data/datasets/coco_stuff_with_prompt_eng.txt').read().splitlines()
+    coco_idx = 0
+    for line in coco_id_names:
+        idx, name = line.split(':')
+        idx = int(idx)
+        if idx == 0 or name == "invalid_class_id":
+            continue
+        #print(COCO_STUFF_CATEGORIES_[coco_idx]["name"], '->', name)
+        assert COCO_STUFF_CATEGORIES_[coco_idx]["id"] == idx
+        COCO_STUFF_CATEGORIES_[coco_idx]["name"] = name
+        coco_idx += 1
+    return COCO_STUFF_CATEGORIES_
+
+
+def get_ade20k_categories_with_prompt_eng():
+    ADE20K_150_CATEGORIES_ = copy.deepcopy(ADE20K_150_CATEGORIES)
+    ade20k_id_names = open('./mask_adapter/data/datasets/ade20k_150_with_prompt_eng.txt').read().splitlines()
+    ade_idx = 0
+    for line in ade20k_id_names:
+        idx, name = line.split(':')
+        idx = int(idx)
+        if idx == 0 or name == "invalid_class_id":
+            continue
+        #print(ADE20K_150_CATEGORIES_[ade_idx]["name"], '->', name)
+        assert ADE20K_150_CATEGORIES_[ade_idx]["id"] == idx - 1
+        ADE20K_150_CATEGORIES_[ade_idx]["name"] = name
+        ade_idx += 1
+    return ADE20K_150_CATEGORIES_
+
+
+def get_cityscapes_categories_with_prompt_eng():
+    CITYSCAPES_CATEGORIES_ = copy.deepcopy(CITYSCAPES_CATEGORIES)
+    cityscapes_id_names = open('./mask_adapter/data/datasets/cityscapes_with_prompt_eng.txt').read().splitlines()
+    cityscapes_idx = 0
+    for line in cityscapes_id_names:
+        idx, name = line.split(':')
+        idx = int(idx)
+        if name == "invalid_class_id":
+            continue
+        #print(CITYSCAPES_CATEGORIES_[cityscapes_idx]["name"], '->', name)
+        assert CITYSCAPES_CATEGORIES_[cityscapes_idx]["trainId"] == idx
+        CITYSCAPES_CATEGORIES_[cityscapes_idx]["name"] = name
+        cityscapes_idx += 1
+    return CITYSCAPES_CATEGORIES_
+
+def get_ade20k_847_categories_with_prompt_eng():
+    ADE20K_847_CATEGORIES_ = copy.deepcopy(ADE20K_847_CATEGORIES)
+    ade20k_847_id_names = open('./mask_adapter/data/datasets/ade20k_847_with_prompt_eng.txt').read().splitlines()
+    ade_idx = 0
+    for line in ade20k_847_id_names:
+        idx, name = line.split(':')
+        idx = int(idx)
+        if idx == 0 or name == "invalid_class_id":
+            continue
+        #print(ADE20K_847_CATEGORIES_[ade_idx]["name"], '->', name)
+        # assert ADE20K_847_CATEGORIES_[ade_idx]["id"] == idx - 1
+        ADE20K_847_CATEGORIES_[ade_idx]["name"] = name
+        ade_idx += 1
+    return ADE20K_847_CATEGORIES_
+
+def get_pascal_21_categories_with_prompt_eng():
+    PASCAL_VOC_21_CATEGORIES_ = copy.deepcopy(PASCAL_VOC_21_CATEGORIES)
+    pascal_21_id_names = open('./mask_adapter/data/datasets/pascal_voc_21_with_prompt_eng.txt').read().splitlines()
+    pas_idx = 0
+    for line in pascal_21_id_names:
+        idx, name = line.split(':')
+        idx = int(idx)
+        PASCAL_VOC_21_CATEGORIES_[pas_idx]["name"] = name
+        pas_idx += 1
+    return PASCAL_VOC_21_CATEGORIES_
+
+def get_pascal_ctx_459_categories_with_prompt_eng():
+    PASCAL_CTX_459_CATEGORIES_ = copy.deepcopy(PASCAL_CTX_459_CATEGORIES)
+    pascal_ctx_459_id_names = open('./mask_adapter/data/datasets/pascal_ctx_459_with_prompt_eng.txt').read().splitlines()
+    pas_idx = 0
+    for line in pascal_ctx_459_id_names:
+        idx, name = line.split(':')
+        idx = int(idx)
+        if idx == 0 or name == "invalid_class_id":
+            continue
+        PASCAL_CTX_459_CATEGORIES_[pas_idx]["name"] = name
+        pas_idx += 1
+    return PASCAL_CTX_459_CATEGORIES_
+
+def get_pascal_ctx_59_categories_with_prompt_eng():
+    PASCAL_CTX_59_CATEGORIES_ = copy.deepcopy(PASCAL_CTX_59_CATEGORIES)
+    pascal_ctx_59_id_names = open('./mask_adapter/data/datasets/pascal_ctx_59_with_prompt_eng.txt').read().splitlines()
+    pas_idx = 0
+    for line in pascal_ctx_59_id_names:
+        idx, name = line.split(':')
+        idx = int(idx)
+        if idx == 0 or name == "invalid_class_id":
+            continue
+        PASCAL_CTX_59_CATEGORIES_[pas_idx]["name"] = name
+        pas_idx += 1
+    return PASCAL_CTX_59_CATEGORIES_
+
+def get_mapillary_vistas_categories_with_prompt_eng():
+    MAPILLARY_VISTAS_SEM_SEG_CATEGORIES_ = copy.deepcopy(MAPILLARY_VISTAS_SEM_SEG_CATEGORIES)
+    mapillary_vistas_id_names = open('./mask_adapter/data/datasets/mapillary_vistas_with_prompt_eng.txt').read().splitlines()
+    mapillary_idx = 0
+    for line in mapillary_vistas_id_names:
+        idx, name = line.split(':')
+        idx = int(idx)
+        if idx == 0 or name == "invalid_class_id":
+            continue
+        MAPILLARY_VISTAS_SEM_SEG_CATEGORIES_[mapillary_idx]["name"] = name
+        mapillary_idx += 1
+    return MAPILLARY_VISTAS_SEM_SEG_CATEGORIES_
+
+def get_grand_categories_with_prompt_eng():
+    GRAND_CATEGORIES_ = []
+    grand_id_names = open('./mask_adapter/data/datasets/grand_with_prompt_eng.txt').read().splitlines()
+    for line in grand_id_names:
+        idx, name = line.split(':')
+        idx = int(idx)
+        if idx == 0 or name == "invalid_class_id":
+            continue
+        GRAND_CATEGORIES_.append({"id": idx, "name": name})
+    return GRAND_CATEGORIES_
+
+if __name__ == "__main__":
+    get_coco_categories_with_prompt_eng()
+    get_ade20k_categories_with_prompt_eng()
+    get_cityscapes_categories_with_prompt_eng()
+    get_ade20k_847_categories_with_prompt_eng()
+    get_pascal_21_categories_with_prompt_eng()
+    get_pascal_ctx_459_categories_with_prompt_eng()
+    get_pascal_ctx_59_categories_with_prompt_eng()
+    get_mapillary_vistas_categories_with_prompt_eng()
+    get_coco_stuff_categories_with_prompt_eng()
+    get_grand_categories_with_prompt_eng()
\ No newline at end of file
diff --git a/mask_adapter/data/datasets/pascal_ctx_459_with_prompt_eng.txt b/mask_adapter/data/datasets/pascal_ctx_459_with_prompt_eng.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c281c47a5ffa24dde42e6151e982ac03e0835138
--- /dev/null
+++ b/mask_adapter/data/datasets/pascal_ctx_459_with_prompt_eng.txt
@@ -0,0 +1,460 @@
+0:invalid_class_id
+1:accordion
+2:aeroplane,aeroplanes,airplanes,airplane
+3:air conditioner
+4:antenna
+5:artillery
+6:ashtray
+7:atrium
+8:baby carriage
+9:bag,bags
+10:ball
+11:balloon
+12:bamboo weaving
+13:barrel
+14:baseball bat
+15:basket
+16:basketball backboard
+17:bathtub
+18:bed,beds
+19:bedclothes
+20:beer
+21:bell
+22:bench,benches
+23:bicycle,bicycles
+24:binoculars
+25:bird,birds
+26:bird cage
+27:bird feeder
+28:bird nest
+29:blackboard
+30:board
+31:boat,boats
+32:bone
+33:book,books
+34:bottle,bottles,water bottle
+35:bottle opener
+36:bowl
+37:box
+38:bracelet
+39:brick
+40:bridge
+41:broom
+42:brush
+43:bucket
+44:building,buildings
+45:bus,buses
+46:cabinet,cabinets,drawer,drawers
+47:cabinet door
+48:cage
+49:cake
+50:calculator
+51:calendar
+52:camel
+53:camera
+54:camera lens
+55:can
+56:candle
+57:candle holder
+58:cap
+59:car,cars
+60:card
+61:cart
+62:computer case
+63:casette recorder
+64:cash register
+65:cat,cats,kitties,kitty
+66:cd
+67:cd player
+68:ceiling
+69:cell phone
+70:cello
+71:chain
+72:chair,chairs
+73:chessboard
+74:chicken
+75:chopstick
+76:clip
+77:clippers
+78:clock
+79:closet
+80:cloth,clothes
+81:clothes tree
+82:coffee
+83:coffee machine
+84:comb
+85:computer
+86:concrete
+87:cone
+88:container
+89:control booth
+90:controller
+91:cooker
+92:copying machine
+93:coral
+94:cork
+95:corkscrew
+96:counter
+97:court
+98:cow,cows
+99:crabstick
+100:crane
+101:crate
+102:cross
+103:crutch
+104:cup,cups
+105:curtain,curtains
+106:cushion
+107:cutting board
+108:dais
+109:disc
+110:disc case
+111:dishwasher
+112:dock
+113:dog,dogs,puppy,puppies
+114:dolphin
+115:door,doors
+116:drainer
+117:dray
+118:drink dispenser
+119:drinking machine
+120:drop
+121:drug
+122:drum
+123:drum kit
+124:duck
+125:dumbbell
+126:earphone
+127:earrings
+128:egg
+129:electric fan
+130:electric iron
+131:electric pot
+132:electric saw
+133:electronic keyboard
+134:engine
+135:envelope
+136:equipment
+137:escalator
+138:exhibition booth
+139:extinguisher
+140:eyeglass
+141:fan
+142:faucet
+143:fax machine
+144:fence,fences
+145:ferris wheel
+146:fire extinguisher
+147:fire hydrant
+148:fire place
+149:fish
+150:fish tank
+151:fishbowl
+152:fishing net
+153:fishing pole
+154:flag
+155:flagstaff
+156:flame
+157:flashlight
+158:floor,tile ground,carpet,rug,flooring
+159:flower,flowers
+160:fly
+161:foam
+162:food
+163:footbridge
+164:forceps
+165:fork
+166:forklift
+167:fountain
+168:fox
+169:frame
+170:fridge
+171:frog
+172:fruit
+173:funnel
+174:furnace
+175:game controller
+176:game machine
+177:gas cylinder
+178:gas hood
+179:gas stove
+180:gift box
+181:glass
+182:glass marble
+183:globe
+184:glove
+185:goal
+186:grandstand
+187:grass,grasses,lawn,turf
+188:gravestone
+189:ground,soil,soil ground,dirt ground
+190:guardrail
+191:guitar
+192:gun
+193:hammer
+194:hand cart
+195:handle
+196:handrail
+197:hanger
+198:hard disk drive
+199:hat
+200:hay
+201:headphone
+202:heater
+203:helicopter
+204:helmet
+205:holder
+206:hook
+207:horse,horses,foal
+208:horse-drawn carriage
+209:hot-air balloon
+210:hydrovalve
+211:ice
+212:inflator pump
+213:ipod
+214:iron
+215:ironing board
+216:jar
+217:kart
+218:kettle
+219:key
+220:keyboard,keyboards
+221:kitchen range
+222:kite
+223:knife
+224:knife block
+225:ladder
+226:ladder truck
+227:ladle
+228:laptop
+229:leaves
+230:lid
+231:life buoy
+232:lamp,lamps
+233:light bulb
+234:lighter
+235:line
+236:lion
+237:lobster
+238:lock
+239:machine
+240:mailbox
+241:mannequin
+242:map
+243:mask
+244:mat
+245:match book
+246:mattress
+247:menu
+248:metal
+249:meter box
+250:microphone
+251:microwave
+252:mirror
+253:missile
+254:model
+255:money
+256:monkey
+257:mop
+258:motorbike,motorcycle,motorbikes,motorcycles
+259:mountain,mountains
+260:mouse
+261:mouse pad
+262:musical instrument
+263:napkin
+264:net
+265:newspaper
+266:oar
+267:ornament
+268:outlet
+269:oven
+270:oxygen bottle
+271:pack
+272:pan
+273:paper
+274:paper box
+275:paper cutter
+276:parachute
+277:parasol
+278:parterre
+279:patio
+280:pelage
+281:pen
+282:pen container
+283:pencil
+284:person,child,girl,boy,woman,man,people,childeren,girls,boys,women,men,lady,guy,ladies,guys
+285:photo
+286:piano
+287:picture
+288:pig
+289:pillar
+290:pillow
+291:pipe
+292:pitcher
+293:plant
+294:plastic
+295:plate,plates
+296:platform,platforms
+297:player
+298:playground
+299:pliers
+300:plume
+301:poker
+302:poker chip
+303:pole
+304:pool table
+305:postcard
+306:poster
+307:pot
+308:pottedplant,pottedplants,plant pot,plant pots,planter,planters
+309:printer
+310:projector
+311:pumpkin
+312:rabbit
+313:racket
+314:radiator
+315:radio
+316:rail
+317:rake
+318:ramp
+319:range hood
+320:receiver
+321:recorder
+322:recreational machines
+323:remote control
+324:road,street,streets
+325:robot
+326:rock,rocks,stone,stones
+327:rocket
+328:rocking horse
+329:rope
+330:rug
+331:ruler
+332:runway
+333:saddle
+334:sand
+335:saw
+336:scale
+337:scanner
+338:scissors
+339:scoop
+340:screen
+341:screwdriver
+342:sculpture
+343:scythe
+344:sewer
+345:sewing machine
+346:shed
+347:sheep
+348:shell
+349:shelves,shelf
+350:shoe
+351:shopping cart
+352:shovel
+353:sidecar
+354:sidewalk
+355:sign,signs
+356:signal light
+357:sink
+358:skateboard
+359:ski
+360:sky,clouds
+361:sled
+362:slippers
+363:smoke
+364:snail
+365:snake
+366:snow
+367:snowmobiles
+368:sofa
+369:spanner
+370:spatula
+371:speaker
+372:speed bump
+373:spice container
+374:spoon
+375:sprayer
+376:squirrel
+377:stage
+378:stair
+379:stapler
+380:stick
+381:sticky note
+382:stone
+383:stool
+384:stove
+385:straw
+386:stretcher
+387:sun
+388:sunglass
+389:sunshade
+390:surveillance camera
+391:swan
+392:sweeper
+393:swim ring
+394:swimming pool
+395:swing
+396:switch
+397:table,diningtable,diningtables,tables,desk,desks,side table,side tables,coffee table
+398:tableware
+399:tank
+400:tap
+401:tape
+402:tarp
+403:telephone
+404:telephone booth
+405:tent
+406:tire
+407:toaster
+408:toilet
+409:tong
+410:tool
+411:toothbrush
+412:towel
+413:toy
+414:toy car
+415:track,train track,railroad
+416:train,trains,locomotive,locomotives,freight train
+417:trampoline
+418:trash bin
+419:tray
+420:tree,trees
+421:tricycle
+422:tripod
+423:trophy
+424:truck,trucks
+425:tube
+426:turtle
+427:tvmonitor,monitor,tv
+428:tweezers
+429:typewriter
+430:umbrella
+431:unknown
+432:vacuum cleaner
+433:vending machine
+434:video camera
+435:video game console
+436:video player
+437:video tape
+438:violin
+439:wakeboard
+440:wall,walls
+441:wallet
+442:wardrobe
+443:washing machine
+444:watch
+445:water
+446:water dispenser
+447:water pipe
+448:water skate board
+449:watermelon
+450:whale
+451:wharf
+452:wheel
+453:wheelchair
+454:window,windows
+455:window blinds
+456:wineglass
+457:wire
+458:wood piece
+459:wool
\ No newline at end of file
diff --git a/mask_adapter/data/datasets/pascal_ctx_59_with_prompt_eng.txt b/mask_adapter/data/datasets/pascal_ctx_59_with_prompt_eng.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5bd5242d89b7bf12043cc051fa0d8f72e99110fe
--- /dev/null
+++ b/mask_adapter/data/datasets/pascal_ctx_59_with_prompt_eng.txt
@@ -0,0 +1,60 @@
+0:invalid_class_id
+1:aeroplane,aeroplanes,airplanes,airplane
+2:bag,bags
+3:bed,beds
+4:bedclothes
+5:bench,benches
+6:bicycle,bicycles
+7:bird,birds
+8:boat,boats
+9:book,books
+10:bottle,bottles,water bottle
+11:building,buildings
+12:bus,buses
+13:cabinet,cabinets,drawer,drawers
+14:car,cars
+15:cat,cats,kitties,kitty
+16:ceiling
+17:chair,chairs
+18:cloth,clothes
+19:computer case
+20:cow,cows
+21:cup,cups
+22:curtain,curtains
+23:dog,dogs,puppy,puppies
+24:door,doors
+25:fence,fences
+26:floor,tile ground,carpet,rug,flooring
+27:flower,flowers
+28:food
+29:grass,grasses,lawn,turf
+30:ground,soil,soil ground,dirt ground
+31:horse,horses,foal
+32:keyboard,keyboards
+33:lamp,lamps,bulb,bulbs
+34:motorbike,motorcycle,motorbikes,motorcycles
+35:mountain,mountains
+36:mouse
+37:person,child,girl,boy,woman,man,people,childeren,girls,boys,women,men,lady,guy,ladies,guys
+38:plate,plates
+39:platform,platforms
+40:pottedplant,pottedplants,plant pot,plant pots,planter,planters
+41:street,streets
+42:rock,rocks,stone,stones
+43:sheep
+44:shelves,shelf
+45:sidewalk
+46:sign,signs
+47:sky,clouds
+48:snow
+49:sofa
+50:diningtable,diningtables,table,tables,desk,desks,side table,side tables,coffee table
+51:track,train track,railroad
+52:train,trains,locomotive,locomotives,freight train
+53:tree,trees
+54:truck,trucks
+55:tvmonitor,monitor,tv
+56:wall,walls
+57:water
+58:window,windows
+59:wood piece
\ No newline at end of file
diff --git a/mask_adapter/data/datasets/pascal_voc_21_with_prompt_eng.txt b/mask_adapter/data/datasets/pascal_voc_21_with_prompt_eng.txt
new file mode 100644
index 0000000000000000000000000000000000000000..08f525d73afec5323496a6da35825dfd24032101
--- /dev/null
+++ b/mask_adapter/data/datasets/pascal_voc_21_with_prompt_eng.txt
@@ -0,0 +1,21 @@
+0:background,crops,bush,shrub,tiles,pavement,rug,carpet,box,boxes,speaker,storage,painting,board,panel,poster,clock,cage,drinking glass,park,plaything,toy,fireplace,bag,bag,bed,bench,book,books,building,buildings,cabinet,drawer,ceiling,computer,computer case,cup,cups,door,fence,floor,flower,grass,lawn,turf,ground,soil,dirt,tiles,keyboard,lamp,mountain,hills,mouse,curtain,platform,sign,street,rock,stone,shelf,sidewalk,sky,clouds,snow,track,train track,tree,trees,wall,water,window,wood,woods
+1:aeroplane,airplane,aeroplanes,airplanes
+2:bicycle,bicycles,bike,bikes
+3:bird,birds
+4:boat,boats
+5:bottle,bottles,water bottle
+6:bus,buses
+7:car,cars
+8:cat,cats,kitties,kitty
+9:chair,chairs
+10:cow,cows,calf
+11:diningtable,dining table,diningtables,dining tables,plate,plates
+12:dog,dogs,puppy,puppies
+13:horse,horses,foal
+14:motorbike,motorcycle,motorbikes,motorcycles
+15:person,child,girl,boy,woman,man,people,childeren,girls,boys,women,men,lady,guy,ladies,guys,clothes
+16:pottedplant,pottedplants,plant pot,plant pots,planter,planters
+17:sheep
+18:sofa,sofas
+19:train,trains,locomotive,locomotives,freight train
+20:tvmonitor,monitor,tv
\ No newline at end of file
diff --git a/mask_adapter/data/datasets/register_ade20k_full.py b/mask_adapter/data/datasets/register_ade20k_full.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f4154e6f81081da9e04e9a90c243194011591d2
--- /dev/null
+++ b/mask_adapter/data/datasets/register_ade20k_full.py
@@ -0,0 +1,62 @@
+"""
+This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”).
+All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. 
+
+Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/datasets/register_ade20k_full.py
+"""
+
+import os
+
+import numpy as np
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.data.datasets import load_sem_seg
+
+from . import openseg_classes
+
+ADE20K_847_CATEGORIES = openseg_classes.get_ade20k_847_categories_with_prompt_eng()
+
+ADE20k_847_COLORS = [np.random.randint(256, size=3).tolist() for k in ADE20K_847_CATEGORIES]
+
+MetadataCatalog.get("openvocab_ade20k_full_sem_seg_train").set(
+    stuff_colors=ADE20k_847_COLORS[:],
+)
+
+MetadataCatalog.get("openvocab_ade20k_full_sem_seg_val").set(
+    stuff_colors=ADE20k_847_COLORS[:],
+)
+
+
+def _get_ade20k_847_meta():
+    # We only need class names
+    stuff_classes = [k["name"] for k in ADE20K_847_CATEGORIES]
+    assert len(stuff_classes) == 847, len(stuff_classes)
+
+    ret = {
+        "stuff_classes": stuff_classes,
+    }
+    return ret
+
+
+def register_all_ade20k_847(root):
+    root = os.path.join(root, "ADE20K_2021_17_01")
+    meta = _get_ade20k_847_meta()
+    for name, dirname in [("train", "training"), ("val", "validation")]:
+        image_dir = os.path.join(root, "images_detectron2", dirname)
+        gt_dir = os.path.join(root, "annotations_detectron2", dirname)
+        name = f"openvocab_ade20k_full_sem_seg_{name}"
+        DatasetCatalog.register(
+            name, lambda x=image_dir, y=gt_dir: load_sem_seg(y, x, gt_ext="tif", image_ext="jpg")
+        )
+        MetadataCatalog.get(name).set(
+            stuff_classes=meta["stuff_classes"][:],
+            image_root=image_dir,
+            sem_seg_root=gt_dir,
+            evaluator_type="sem_seg",
+            ignore_label=65535,  # NOTE: gt is saved in 16-bit TIFF images
+            gt_ext="tif",
+        )
+
+
+_root = os.getenv("DETECTRON2_DATASETS", "datasets")
+register_all_ade20k_847(_root)
\ No newline at end of file
diff --git a/mask_adapter/data/datasets/register_ade20k_instance.py b/mask_adapter/data/datasets/register_ade20k_instance.py
new file mode 100644
index 0000000000000000000000000000000000000000..a158b19659c275083e102babbbf692610d3a6564
--- /dev/null
+++ b/mask_adapter/data/datasets/register_ade20k_instance.py
@@ -0,0 +1,61 @@
+"""
+This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”).
+All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. 
+
+Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/datasets/register_ade20k_instance.py
+"""
+
+import json
+import logging
+import numpy as np
+import os
+from PIL import Image
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.data.datasets.coco import load_coco_json, register_coco_instances
+from detectron2.utils.file_io import PathManager
+
+from . import openseg_classes
+import copy
+ADE_CATEGORIES = copy.deepcopy(openseg_classes.ADE20K_150_CATEGORIES)
+ADE_CATEGORIES = [x for x in ADE_CATEGORIES if x["isthing"] == 1]
+
+_PREDEFINED_SPLITS = {
+    # point annotations without masks
+    "openvocab_ade20k_instance_train": (
+        "ADEChallengeData2016/images/training",
+        "ADEChallengeData2016/ade20k_instance_train.json",
+    ),
+    "openvocab_ade20k_instance_val": (
+        "ADEChallengeData2016/images/validation",
+        "ADEChallengeData2016/ade20k_instance_val.json",
+    ),
+}
+
+
+def _get_ade_instances_meta():
+    thing_ids = [k["id"] for k in ADE_CATEGORIES]
+    assert len(thing_ids) == 100, len(thing_ids)
+    # Mapping from the incontiguous ADE category id to an id in [0, 99]
+    thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)}
+    thing_classes = [k["name"] for k in ADE_CATEGORIES]
+    ret = {
+        "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id,
+        "thing_classes": thing_classes,
+    }
+    return ret
+
+
+def register_all_ade20k_instance(root):
+    for key, (image_root, json_file) in _PREDEFINED_SPLITS.items():
+        # Assume pre-defined datasets live in `./datasets`.
+        register_coco_instances(
+            key,
+            _get_ade_instances_meta(),
+            os.path.join(root, json_file) if "://" not in json_file else json_file,
+            os.path.join(root, image_root),
+        )
+
+
+_root = os.getenv("DETECTRON2_DATASETS", "datasets")
+register_all_ade20k_instance(_root)
diff --git a/mask_adapter/data/datasets/register_ade20k_panoptic.py b/mask_adapter/data/datasets/register_ade20k_panoptic.py
new file mode 100644
index 0000000000000000000000000000000000000000..f84b9006867517a8a3e8074f4d04b5356c47f0aa
--- /dev/null
+++ b/mask_adapter/data/datasets/register_ade20k_panoptic.py
@@ -0,0 +1,222 @@
+"""
+This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”).
+All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. 
+
+Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/datasets/register_ade20k_panoptic.py
+"""
+
+import json
+import os
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.utils.file_io import PathManager
+from detectron2.data.datasets.coco import load_sem_seg
+
+
+from . import openseg_classes
+
+ADE20K_150_CATEGORIES = openseg_classes.get_ade20k_categories_with_prompt_eng()
+
+ADE20k_COLORS = [k["color"] for k in ADE20K_150_CATEGORIES]
+
+MetadataCatalog.get("openvocab_ade20k_sem_seg_train").set(
+    stuff_colors=ADE20k_COLORS[:],
+)
+
+MetadataCatalog.get("openvocab_ade20k_sem_seg_val").set(
+    stuff_colors=ADE20k_COLORS[:],
+)
+
+
+def load_ade20k_panoptic_json(json_file, image_dir, gt_dir, semseg_dir, meta,panoptic_name):
+    """
+    Args:
+        image_dir (str): path to the raw dataset. e.g., "~/coco/train2017".
+        gt_dir (str): path to the raw annotations. e.g., "~/coco/panoptic_train2017".
+        json_file (str): path to the json file. e.g., "~/coco/annotations/panoptic_train2017.json".
+    Returns:
+        list[dict]: a list of dicts in Detectron2 standard format. (See
+        `Using Custom Datasets </tutorials/datasets.html>`_ )
+    """
+
+    def _convert_category_id(segment_info, meta):
+        if segment_info["category_id"] in meta["thing_dataset_id_to_contiguous_id"]:
+            segment_info["category_id"] = meta["thing_dataset_id_to_contiguous_id"][
+                segment_info["category_id"]
+            ]
+            segment_info["isthing"] = True
+        else:
+            segment_info["category_id"] = meta["stuff_dataset_id_to_contiguous_id"][
+                segment_info["category_id"]
+            ]
+            segment_info["isthing"] = False
+        return segment_info
+
+    with PathManager.open(json_file) as f:
+        json_info = json.load(f)
+
+    ret = []
+    for ann in json_info["annotations"]:
+        image_id = ann["image_id"]
+        # TODO: currently we assume image and label has the same filename but
+        # different extension, and images have extension ".jpg" for COCO. Need
+        # to make image extension a user-provided argument if we extend this
+        # function to support other COCO-like datasets.
+        image_file = os.path.join(image_dir, os.path.splitext(ann["file_name"])[0] + ".jpg")
+        label_file = os.path.join(gt_dir, ann["file_name"])
+        sem_label_file = os.path.join(semseg_dir, ann["file_name"])
+        segments_info = [_convert_category_id(x, meta) for x in ann["segments_info"]]
+        ret.append(
+            {
+                "file_name": image_file,
+                "image_id": image_id,
+                "pan_seg_file_name": label_file,
+                "sem_seg_file_name": sem_label_file,
+                "segments_info": segments_info,
+                "dataname": panoptic_name,
+            }
+        )
+    assert len(ret), f"No images found in {image_dir}!"
+    assert PathManager.isfile(ret[0]["file_name"]), ret[0]["file_name"]
+    assert PathManager.isfile(ret[0]["pan_seg_file_name"]), ret[0]["pan_seg_file_name"]
+    assert PathManager.isfile(ret[0]["sem_seg_file_name"]), ret[0]["sem_seg_file_name"]
+    return ret
+
+
+def register_ade20k_panoptic(
+    name, metadata, image_root, panoptic_root, semantic_root, panoptic_json, instances_json=None
+):
+    """
+    Register a "standard" version of ADE20k panoptic segmentation dataset named `name`.
+    The dictionaries in this registered dataset follows detectron2's standard format.
+    Hence it's called "standard".
+    Args:
+        name (str): the name that identifies a dataset,
+            e.g. "ade20k_panoptic_train"
+        metadata (dict): extra metadata associated with this dataset.
+        image_root (str): directory which contains all the images
+        panoptic_root (str): directory which contains panoptic annotation images in COCO format
+        panoptic_json (str): path to the json panoptic annotation file in COCO format
+        sem_seg_root (none): not used, to be consistent with
+            `register_coco_panoptic_separated`.
+        instances_json (str): path to the json instance annotation file
+    """
+    panoptic_name = name
+    DatasetCatalog.register(
+        panoptic_name,
+        lambda: load_ade20k_panoptic_json(
+            panoptic_json, image_root, panoptic_root, semantic_root, metadata, panoptic_name
+        ),
+    )
+    MetadataCatalog.get(panoptic_name).set(
+        panoptic_root=panoptic_root,
+        image_root=image_root,
+        panoptic_json=panoptic_json,
+        json_file=instances_json,
+        evaluator_type="ade20k_panoptic_seg",
+        ignore_label=255,
+        label_divisor=1000,
+        **metadata,
+    )
+
+
+_PREDEFINED_SPLITS_ADE20K_PANOPTIC = {
+    "openvocab_ade20k_panoptic_train": (
+        "ADEChallengeData2016/images/training",
+        "ADEChallengeData2016/ade20k_panoptic_train",
+        "ADEChallengeData2016/ade20k_panoptic_train.json",
+        "ADEChallengeData2016/annotations_detectron2/training",
+        "ADEChallengeData2016/ade20k_instance_train.json",
+    ),
+    "openvocab_ade20k_panoptic_val": (
+        "ADEChallengeData2016/images/validation",
+        "ADEChallengeData2016/ade20k_panoptic_val",
+        "ADEChallengeData2016/ade20k_panoptic_val.json",
+        "ADEChallengeData2016/annotations_detectron2/validation",
+        "ADEChallengeData2016/ade20k_instance_val.json",
+    ),
+}
+
+
+def get_metadata():
+    meta = {}
+    # The following metadata maps contiguous id from [0, #thing categories +
+    # #stuff categories) to their names and colors. We have to replica of the
+    # same name and color under "thing_*" and "stuff_*" because the current
+    # visualization function in D2 handles thing and class classes differently
+    # due to some heuristic used in Panoptic FPN. We keep the same naming to
+    # enable reusing existing visualization functions.
+    thing_classes = [k["name"] for k in ADE20K_150_CATEGORIES if k["isthing"] == 1]
+    thing_colors = [k["color"] for k in ADE20K_150_CATEGORIES if k["isthing"] == 1]
+    stuff_classes = [k["name"] for k in ADE20K_150_CATEGORIES]
+    stuff_colors = [k["color"] for k in ADE20K_150_CATEGORIES]
+
+    meta["thing_classes"] = thing_classes
+    meta["thing_colors"] = thing_colors
+    meta["stuff_classes"] = stuff_classes
+    meta["stuff_colors"] = stuff_colors
+
+    # Convert category id for training:
+    #   category id: like semantic segmentation, it is the class id for each
+    #   pixel. Since there are some classes not used in evaluation, the category
+    #   id is not always contiguous and thus we have two set of category ids:
+    #       - original category id: category id in the original dataset, mainly
+    #           used for evaluation.
+    #       - contiguous category id: [0, #classes), in order to train the linear
+    #           softmax classifier.
+    thing_dataset_id_to_contiguous_id = {}
+    stuff_dataset_id_to_contiguous_id = {}
+
+    for i, cat in enumerate(ADE20K_150_CATEGORIES):
+        if cat["isthing"]:
+            thing_dataset_id_to_contiguous_id[cat["id"]] = i
+        # else:
+        #     stuff_dataset_id_to_contiguous_id[cat["id"]] = i
+
+        # in order to use sem_seg evaluator
+        stuff_dataset_id_to_contiguous_id[cat["id"]] = i
+
+    meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id
+    meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id
+
+    return meta
+
+
+def register_all_ade20k_panoptic(root):
+    metadata = get_metadata()
+    for (
+        prefix,
+        (image_root, panoptic_root, panoptic_json, semantic_root, instance_json),
+    ) in _PREDEFINED_SPLITS_ADE20K_PANOPTIC.items():
+        # The "standard" version of COCO panoptic segmentation dataset,
+        # e.g. used by Panoptic-DeepLab
+        register_ade20k_panoptic(
+            prefix,
+            metadata,
+            os.path.join(root, image_root),
+            os.path.join(root, panoptic_root),
+            os.path.join(root, semantic_root),
+            os.path.join(root, panoptic_json),
+            os.path.join(root, instance_json),
+        )
+
+def register_all_ade20k_semantic(root):
+    root = os.path.join(root, "ADEChallengeData2016")
+    for name, dirname in [("train", "training"), ("val", "validation")]:
+        image_dir = os.path.join(root, "images", dirname)
+        gt_dir = os.path.join(root, "annotations_detectron2", dirname)
+        name = f"openvocab_ade20k_sem_seg_{name}"
+        DatasetCatalog.register(
+            name, lambda x=image_dir, y=gt_dir: load_sem_seg(y, x, gt_ext="png", image_ext="jpg")
+        )
+        MetadataCatalog.get(name).set(
+            stuff_classes=[x["name"] for x in ADE20K_150_CATEGORIES],
+            image_root=image_dir,
+            sem_seg_root=gt_dir,
+            evaluator_type="sem_seg",
+            ignore_label=255,
+        )
+
+_root = os.getenv("DETECTRON2_DATASETS", "datasets")
+register_all_ade20k_panoptic(_root)
+register_all_ade20k_semantic(_root)
\ No newline at end of file
diff --git a/mask_adapter/data/datasets/register_cityscapes_panoptic.py b/mask_adapter/data/datasets/register_cityscapes_panoptic.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f0dd97b57768d72dc79d41de5621e3c62dfd707
--- /dev/null
+++ b/mask_adapter/data/datasets/register_cityscapes_panoptic.py
@@ -0,0 +1,200 @@
+"""
+This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”).
+All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. 
+
+Reference: https://github.com/facebookresearch/detectron2/blob/main/detectron2/data/datasets/cityscapes_panoptic.py
+"""
+
+import json
+import logging
+import os
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.utils.file_io import PathManager
+
+from . import openseg_classes
+
+CITYSCAPES_CATEGORIES = openseg_classes.get_cityscapes_categories_with_prompt_eng()
+
+"""
+This file contains functions to register the Cityscapes panoptic dataset to the DatasetCatalog.
+"""
+
+
+logger = logging.getLogger(__name__)
+
+
+def get_cityscapes_panoptic_files(image_dir, gt_dir, json_info):
+    files = []
+    # scan through the directory
+    cities = PathManager.ls(image_dir)
+    logger.info(f"{len(cities)} cities found in '{image_dir}'.")
+    image_dict = {}
+    for city in cities:
+        city_img_dir = os.path.join(image_dir, city)
+        for basename in PathManager.ls(city_img_dir):
+            image_file = os.path.join(city_img_dir, basename)
+
+            suffix = "_leftImg8bit.png"
+            assert basename.endswith(suffix), basename
+            basename = os.path.basename(basename)[: -len(suffix)]
+
+            image_dict[basename] = image_file
+
+    for ann in json_info["annotations"]:
+        image_file = image_dict.get(ann["image_id"], None)
+        assert image_file is not None, "No image {} found for annotation {}".format(
+            ann["image_id"], ann["file_name"]
+        )
+        label_file = os.path.join(gt_dir, ann["file_name"])
+        segments_info = ann["segments_info"]
+
+        files.append((image_file, label_file, segments_info))
+
+    assert len(files), "No images found in {}".format(image_dir)
+    assert PathManager.isfile(files[0][0]), files[0][0]
+    assert PathManager.isfile(files[0][1]), files[0][1]
+    return files
+
+
+def load_cityscapes_panoptic(image_dir, gt_dir, gt_json, meta):
+    """
+    Args:
+        image_dir (str): path to the raw dataset. e.g., "~/cityscapes/leftImg8bit/train".
+        gt_dir (str): path to the raw annotations. e.g.,
+            "~/cityscapes/gtFine/cityscapes_panoptic_train".
+        gt_json (str): path to the json file. e.g.,
+            "~/cityscapes/gtFine/cityscapes_panoptic_train.json".
+        meta (dict): dictionary containing "thing_dataset_id_to_contiguous_id"
+            and "stuff_dataset_id_to_contiguous_id" to map category ids to
+            contiguous ids for training.
+
+    Returns:
+        list[dict]: a list of dicts in Detectron2 standard format. (See
+        `Using Custom Datasets </tutorials/datasets.html>`_ )
+    """
+
+    def _convert_category_id(segment_info, meta):
+        if segment_info["category_id"] in meta["thing_dataset_id_to_contiguous_id"]:
+            segment_info["category_id"] = meta["thing_dataset_id_to_contiguous_id"][
+                segment_info["category_id"]
+            ]
+        else:
+            segment_info["category_id"] = meta["stuff_dataset_id_to_contiguous_id"][
+                segment_info["category_id"]
+            ]
+        return segment_info
+
+    assert os.path.exists(
+        gt_json
+    ), "Please run `python cityscapesscripts/preparation/createPanopticImgs.py` to generate label files."  # noqa
+    with open(gt_json) as f:
+        json_info = json.load(f)
+    files = get_cityscapes_panoptic_files(image_dir, gt_dir, json_info)
+    ret = []
+    for image_file, label_file, segments_info in files:
+        sem_label_file = (
+            image_file.replace("leftImg8bit", "gtFine").split(".")[0] + "_labelTrainIds.png"
+        )
+        segments_info = [_convert_category_id(x, meta) for x in segments_info]
+        ret.append(
+            {
+                "file_name": image_file,
+                "image_id": "_".join(
+                    os.path.splitext(os.path.basename(image_file))[0].split("_")[:3]
+                ),
+                "sem_seg_file_name": sem_label_file,
+                "pan_seg_file_name": label_file,
+                "segments_info": segments_info,
+            }
+        )
+    assert len(ret), f"No images found in {image_dir}!"
+    assert PathManager.isfile(
+        ret[0]["sem_seg_file_name"]
+    ), "Please generate labelTrainIds.png with cityscapesscripts/preparation/createTrainIdLabelImgs.py"  # noqa
+    assert PathManager.isfile(
+        ret[0]["pan_seg_file_name"]
+    ), "Please generate panoptic annotation with python cityscapesscripts/preparation/createPanopticImgs.py"  # noqa
+    return ret
+
+
+# rename to avoid conflict
+_RAW_CITYSCAPES_PANOPTIC_SPLITS = {
+    "openvocab_cityscapes_fine_panoptic_train": (
+        "cityscapes/leftImg8bit/train",
+        "cityscapes/gtFine/cityscapes_panoptic_train",
+        "cityscapes/gtFine/cityscapes_panoptic_train.json",
+    ),
+    "openvocab_cityscapes_fine_panoptic_val": (
+        "cityscapes/leftImg8bit/val",
+        "cityscapes/gtFine/cityscapes_panoptic_val",
+        "cityscapes/gtFine/cityscapes_panoptic_val.json",
+    ),
+    # "cityscapes_fine_panoptic_test": not supported yet
+}
+
+
+def register_all_cityscapes_panoptic(root):
+    meta = {}
+    # The following metadata maps contiguous id from [0, #thing categories +
+    # #stuff categories) to their names and colors. We have to replica of the
+    # same name and color under "thing_*" and "stuff_*" because the current
+    # visualization function in D2 handles thing and class classes differently
+    # due to some heuristic used in Panoptic FPN. We keep the same naming to
+    # enable reusing existing visualization functions.
+    thing_classes = [k["name"] for k in CITYSCAPES_CATEGORIES]
+    thing_colors = [k["color"] for k in CITYSCAPES_CATEGORIES]
+    stuff_classes = [k["name"] for k in CITYSCAPES_CATEGORIES]
+    stuff_colors = [k["color"] for k in CITYSCAPES_CATEGORIES]
+
+    meta["thing_classes"] = thing_classes
+    meta["thing_colors"] = thing_colors
+    meta["stuff_classes"] = stuff_classes
+    meta["stuff_colors"] = stuff_colors
+
+    # There are three types of ids in cityscapes panoptic segmentation:
+    # (1) category id: like semantic segmentation, it is the class id for each
+    #   pixel. Since there are some classes not used in evaluation, the category
+    #   id is not always contiguous and thus we have two set of category ids:
+    #       - original category id: category id in the original dataset, mainly
+    #           used for evaluation.
+    #       - contiguous category id: [0, #classes), in order to train the classifier
+    # (2) instance id: this id is used to differentiate different instances from
+    #   the same category. For "stuff" classes, the instance id is always 0; for
+    #   "thing" classes, the instance id starts from 1 and 0 is reserved for
+    #   ignored instances (e.g. crowd annotation).
+    # (3) panoptic id: this is the compact id that encode both category and
+    #   instance id by: category_id * 1000 + instance_id.
+    thing_dataset_id_to_contiguous_id = {}
+    stuff_dataset_id_to_contiguous_id = {}
+
+    for k in CITYSCAPES_CATEGORIES:
+        if k["isthing"] == 1:
+            thing_dataset_id_to_contiguous_id[k["id"]] = k["trainId"]
+        else:
+            stuff_dataset_id_to_contiguous_id[k["id"]] = k["trainId"]
+
+    meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id
+    meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id
+
+    for key, (image_dir, gt_dir, gt_json) in _RAW_CITYSCAPES_PANOPTIC_SPLITS.items():
+        image_dir = os.path.join(root, image_dir)
+        gt_dir = os.path.join(root, gt_dir)
+        gt_json = os.path.join(root, gt_json)
+
+        DatasetCatalog.register(
+            key, lambda x=image_dir, y=gt_dir, z=gt_json: load_cityscapes_panoptic(x, y, z, meta)
+        )
+        MetadataCatalog.get(key).set(
+            panoptic_root=gt_dir,
+            image_root=image_dir,
+            panoptic_json=gt_json,
+            gt_dir=gt_dir.replace("cityscapes_panoptic_", ""),
+            evaluator_type="cityscapes_panoptic_seg",
+            ignore_label=255,
+            label_divisor=1000,
+            **meta,
+        )
+
+_root = os.getenv("DETECTRON2_DATASETS", "datasets")
+register_all_cityscapes_panoptic(_root)
\ No newline at end of file
diff --git a/mask_adapter/data/datasets/register_coco_instance.py b/mask_adapter/data/datasets/register_coco_instance.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1b7f3862943b8c6ebe76520cd2532e8f510ba41
--- /dev/null
+++ b/mask_adapter/data/datasets/register_coco_instance.py
@@ -0,0 +1,61 @@
+"""
+This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”).
+All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. 
+
+Reference: https://github.com/facebookresearch/detectron2/blob/67ac149947124670f6678e1bdd75f89dbf0dd5e7/detectron2/data/datasets/coco.py
+"""
+
+import json
+import logging
+import numpy as np
+import os
+from PIL import Image
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.data.datasets.coco import load_coco_json, register_coco_instances
+from detectron2.utils.file_io import PathManager
+
+from . import openseg_classes
+import copy
+COCO_CATEGORIES = openseg_classes.get_coco_categories_with_prompt_eng()
+COCO_CATEGORIES = [x for x in COCO_CATEGORIES if x["isthing"] == 1]
+
+_PREDEFINED_SPLITS = {
+    # point annotations without masks
+    "openvocab_coco_2017_train": (
+        "coco/train2017",
+        "coco/annotations/instances_train2017.json",
+    ),
+    "openvocab_coco_2017_val": (
+        "coco/val2017",
+        "coco/annotations/instances_val2017.json",
+    ),
+}
+
+
+def _get_coco_instances_meta():
+    thing_ids = [k["id"] for k in COCO_CATEGORIES]
+    assert len(thing_ids) == 80, len(thing_ids)
+    # Mapping from the incontiguous ADE category id to an id in [0, 99]
+    thing_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(thing_ids)}
+    thing_classes = [k["name"] for k in COCO_CATEGORIES]
+    ret = {
+        "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id,
+        "thing_classes": thing_classes,
+    }
+    return ret
+
+
+def register_all_coco_instance(root):
+    for key, (image_root, json_file) in _PREDEFINED_SPLITS.items():
+        # Assume pre-defined datasets live in `./datasets`.
+        register_coco_instances(
+            key,
+            _get_coco_instances_meta(),
+            os.path.join(root, json_file) if "://" not in json_file else json_file,
+            os.path.join(root, image_root),
+        )
+
+
+_root = os.getenv("DETECTRON2_DATASETS", "datasets")
+register_all_coco_instance(_root)
diff --git a/mask_adapter/data/datasets/register_coco_panoptic_annos_semseg.py b/mask_adapter/data/datasets/register_coco_panoptic_annos_semseg.py
new file mode 100644
index 0000000000000000000000000000000000000000..6cc343b9316e5d095a0ab34af75693be1aec7895
--- /dev/null
+++ b/mask_adapter/data/datasets/register_coco_panoptic_annos_semseg.py
@@ -0,0 +1,196 @@
+"""
+This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”).
+All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. 
+
+Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/datasets/register_coco_panoptic_annos_semseg.py
+"""
+
+import json
+import os
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.data.datasets import load_sem_seg
+# from detectron2.data.datasets.builtin_meta import COCO_CATEGORIES
+from . import openseg_classes
+
+from detectron2.utils.file_io import PathManager
+
+
+COCO_CATEGORIES = openseg_classes.get_coco_categories_with_prompt_eng()
+
+_PREDEFINED_SPLITS_COCO_PANOPTIC = {
+    "openvocab_coco_2017_train_panoptic": (
+        # This is the original panoptic annotation directory
+        "coco/panoptic_train2017",
+        "coco/annotations/panoptic_train2017.json",
+        # This directory contains semantic annotations that are
+        # converted from panoptic annotations.
+        # It is used by PanopticFPN.
+        # You can use the script at detectron2/datasets/prepare_panoptic_fpn.py
+        # to create these directories.
+        "coco/panoptic_semseg_train2017",
+    ),
+    "openvocab_coco_2017_val_panoptic": (
+        "coco/panoptic_val2017",
+        "coco/annotations/panoptic_val2017.json",
+        "coco/panoptic_semseg_val2017",
+    ),
+}
+
+
+def get_metadata():
+    meta = {}
+    # The following metadata maps contiguous id from [0, #thing categories +
+    # #stuff categories) to their names and colors. We have to replica of the
+    # same name and color under "thing_*" and "stuff_*" because the current
+    # visualization function in D2 handles thing and class classes differently
+    # due to some heuristic used in Panoptic FPN. We keep the same naming to
+    # enable reusing existing visualization functions.
+    thing_classes = [k["name"] for k in COCO_CATEGORIES if k["isthing"] == 1]
+    thing_colors = [k["color"] for k in COCO_CATEGORIES if k["isthing"] == 1]
+    stuff_classes = [k["name"] for k in COCO_CATEGORIES]
+    stuff_colors = [k["color"] for k in COCO_CATEGORIES]
+
+    meta["thing_classes"] = thing_classes
+    meta["thing_colors"] = thing_colors
+    meta["stuff_classes"] = stuff_classes
+    meta["stuff_colors"] = stuff_colors
+
+    # Convert category id for training:
+    #   category id: like semantic segmentation, it is the class id for each
+    #   pixel. Since there are some classes not used in evaluation, the category
+    #   id is not always contiguous and thus we have two set of category ids:
+    #       - original category id: category id in the original dataset, mainly
+    #           used for evaluation.
+    #       - contiguous category id: [0, #classes), in order to train the linear
+    #           softmax classifier.
+    thing_dataset_id_to_contiguous_id = {}
+    stuff_dataset_id_to_contiguous_id = {}
+    contiguous_id_to_class_name = []
+
+    for i, cat in enumerate(COCO_CATEGORIES):
+        if cat["isthing"]:
+            thing_dataset_id_to_contiguous_id[cat["id"]] = i
+        # else:
+        #     stuff_dataset_id_to_contiguous_id[cat["id"]] = i
+
+        # in order to use sem_seg evaluator
+        stuff_dataset_id_to_contiguous_id[cat["id"]] = i
+
+        contiguous_id_to_class_name.append(cat["name"]) 
+
+    meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id
+    meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id
+    meta["contiguous_id_to_class_name"] = contiguous_id_to_class_name
+
+    return meta
+
+
+def load_coco_panoptic_json(json_file, image_dir, gt_dir, semseg_dir, meta, semantic_name):
+    """
+    Args:
+        image_dir (str): path to the raw dataset. e.g., "~/coco/train2017".
+        gt_dir (str): path to the raw annotations. e.g., "~/coco/panoptic_train2017".
+        json_file (str): path to the json file. e.g., "~/coco/annotations/panoptic_train2017.json".
+    Returns:
+        list[dict]: a list of dicts in Detectron2 standard format. (See
+        `Using Custom Datasets </tutorials/datasets.html>`_ )
+    """
+
+    def _convert_category_id(segment_info, meta):
+        if segment_info["category_id"] in meta["thing_dataset_id_to_contiguous_id"]:
+            segment_info["category_id"] = meta["thing_dataset_id_to_contiguous_id"][
+                segment_info["category_id"]
+            ]
+            segment_info["isthing"] = True
+        else:
+            segment_info["category_id"] = meta["stuff_dataset_id_to_contiguous_id"][
+                segment_info["category_id"]
+            ]
+            segment_info["isthing"] = False
+        return segment_info
+
+    with PathManager.open(json_file) as f:
+        json_info = json.load(f)
+
+    ret = []
+    for ann in json_info["annotations"]:
+        image_id = int(ann["image_id"])
+        # TODO: currently we assume image and label has the same filename but
+        # different extension, and images have extension ".jpg" for COCO. Need
+        # to make image extension a user-provided argument if we extend this
+        # function to support other COCO-like datasets.
+        image_file = os.path.join(image_dir, os.path.splitext(ann["file_name"])[0] + ".jpg")
+        label_file = os.path.join(gt_dir, ann["file_name"])
+        sem_label_file = os.path.join(semseg_dir, ann["file_name"])
+        segments_info = [_convert_category_id(x, meta) for x in ann["segments_info"]]
+        ret.append(
+            {
+                "file_name": image_file,
+                "image_id": image_id,
+                "pan_seg_file_name": label_file,
+                "sem_seg_file_name": sem_label_file,
+                "segments_info": segments_info,
+                "dataname": semantic_name,
+            }
+        )
+    assert len(ret), f"No images found in {image_dir}!"
+    assert PathManager.isfile(ret[0]["file_name"]), ret[0]["file_name"]
+    assert PathManager.isfile(ret[0]["pan_seg_file_name"]), ret[0]["pan_seg_file_name"]
+    assert PathManager.isfile(ret[0]["sem_seg_file_name"]), ret[0]["sem_seg_file_name"]
+    return ret
+
+
+def register_coco_panoptic_annos_sem_seg(
+    name, metadata, image_root, panoptic_root, panoptic_json, sem_seg_root, instances_json
+):
+    panoptic_name = name
+    #delattr(MetadataCatalog.get(panoptic_name), "thing_classes")
+    #delattr(MetadataCatalog.get(panoptic_name), "thing_colors")
+    MetadataCatalog.get(panoptic_name).set(
+        thing_classes=metadata["thing_classes"],
+        thing_colors=metadata["thing_colors"],
+        # thing_dataset_id_to_contiguous_id=metadata["thing_dataset_id_to_contiguous_id"],
+    )
+
+    # the name is "coco_2017_train_panoptic_with_sem_seg" and "coco_2017_val_panoptic_with_sem_seg"
+    semantic_name = name + "_with_sem_seg"
+    DatasetCatalog.register(
+        semantic_name,
+        lambda: load_coco_panoptic_json(panoptic_json, image_root, panoptic_root, sem_seg_root, metadata, semantic_name),
+    )
+    MetadataCatalog.get(semantic_name).set(
+        sem_seg_root=sem_seg_root,
+        panoptic_root=panoptic_root,
+        image_root=image_root,
+        panoptic_json=panoptic_json,
+        json_file=instances_json,
+        evaluator_type="coco_panoptic_seg",
+        ignore_label=255,
+        label_divisor=1000,
+        **metadata,
+    )
+
+
+def register_all_coco_panoptic_annos_sem_seg(root):
+    for (
+        prefix,
+        (panoptic_root, panoptic_json, semantic_root),
+    ) in _PREDEFINED_SPLITS_COCO_PANOPTIC.items():
+        prefix_instances = prefix[: -len("_panoptic")].replace("openvocab_", "")
+        instances_meta = MetadataCatalog.get(prefix_instances)
+        image_root, instances_json = instances_meta.image_root, instances_meta.json_file
+
+        register_coco_panoptic_annos_sem_seg(
+            prefix,
+            get_metadata(),
+            image_root,
+            os.path.join(root, panoptic_root),
+            os.path.join(root, panoptic_json),
+            os.path.join(root, semantic_root),
+            instances_json,
+        )
+
+
+_root = os.getenv("DETECTRON2_DATASETS", "datasets")
+register_all_coco_panoptic_annos_sem_seg(_root)
\ No newline at end of file
diff --git a/mask_adapter/data/datasets/register_coco_stuff_164k.py b/mask_adapter/data/datasets/register_coco_stuff_164k.py
new file mode 100644
index 0000000000000000000000000000000000000000..4763fe2df2d705a81e82d432a4e448a3f7ddc3eb
--- /dev/null
+++ b/mask_adapter/data/datasets/register_coco_stuff_164k.py
@@ -0,0 +1,63 @@
+"""
+This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”).
+All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. 
+
+Reference: https://github.com/MendelXu/SAN/blob/main/san/data/datasets/register_coco_stuff_164k.py
+"""
+
+import os
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.data.datasets import load_sem_seg
+
+from . import openseg_classes
+
+COCO_CATEGORIES = openseg_classes.get_coco_stuff_categories_with_prompt_eng()
+
+
+def _get_coco_stuff_meta():
+    # Id 0 is reserved for ignore_label, we change ignore_label for 0
+    # to 255 in our pre-processing.
+    stuff_ids = [k["id"] for k in COCO_CATEGORIES]
+    assert len(stuff_ids) == 171, len(stuff_ids)
+
+    # For semantic segmentation, this mapping maps from contiguous stuff id
+    # (in [0, 91], used in models) to ids in the dataset (used for processing results)
+    stuff_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(stuff_ids)}
+    stuff_classes = [k["name"] for k in COCO_CATEGORIES]
+
+    ret = {
+        "stuff_dataset_id_to_contiguous_id": stuff_dataset_id_to_contiguous_id,
+        "stuff_classes": stuff_classes,
+    }
+    return ret
+
+
+def register_all_coco_stuff_164k(root):
+    root = os.path.join(root, "coco")
+    meta = _get_coco_stuff_meta()
+
+    for name, image_dirname, sem_seg_dirname in [
+        ("train", "train2017", "stuffthingmaps_detectron2/train2017"),
+        ("test", "val2017", "stuffthingmaps_detectron2/val2017"),
+    ]:
+        image_dir = os.path.join(root, image_dirname)
+        gt_dir = os.path.join(root, sem_seg_dirname)
+        all_name = f"openvocab_coco_2017_{name}_stuff_sem_seg"
+        DatasetCatalog.register(
+            all_name,
+            lambda x=image_dir, y=gt_dir: load_sem_seg(
+                y, x, gt_ext="png", image_ext="jpg"
+            ),
+        )
+        MetadataCatalog.get(all_name).set(
+            image_root=image_dir,
+            sem_seg_root=gt_dir,
+            evaluator_type="sem_seg",
+            ignore_label=255,
+            **meta,
+        )
+
+
+_root = os.getenv("DETECTRON2_DATASETS", "datasets")
+register_all_coco_stuff_164k(_root)
\ No newline at end of file
diff --git a/mask_adapter/data/datasets/register_mapillary_vistas_panoptic.py b/mask_adapter/data/datasets/register_mapillary_vistas_panoptic.py
new file mode 100644
index 0000000000000000000000000000000000000000..e89948fdd9e6051ff47215f46374e7981cc1a7ae
--- /dev/null
+++ b/mask_adapter/data/datasets/register_mapillary_vistas_panoptic.py
@@ -0,0 +1,188 @@
+"""
+This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”).
+All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. 
+
+Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/data/datasets/register_mapillary_vistas_panoptic.py
+"""
+
+import json
+import os
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.utils.file_io import PathManager
+
+from . import openseg_classes
+
+MAPILLARY_VISTAS_SEM_SEG_CATEGORIES = openseg_classes.get_mapillary_vistas_categories_with_prompt_eng()
+
+def load_mapillary_vistas_panoptic_json(json_file, image_dir, gt_dir, semseg_dir, meta):
+    """
+    Args:
+        image_dir (str): path to the raw dataset. e.g., "~/coco/train2017".
+        gt_dir (str): path to the raw annotations. e.g., "~/coco/panoptic_train2017".
+        json_file (str): path to the json file. e.g., "~/coco/annotations/panoptic_train2017.json".
+    Returns:
+        list[dict]: a list of dicts in Detectron2 standard format. (See
+        `Using Custom Datasets </tutorials/datasets.html>`_ )
+    """
+
+    def _convert_category_id(segment_info, meta):
+        if segment_info["category_id"] in meta["thing_dataset_id_to_contiguous_id"]:
+            segment_info["category_id"] = meta["thing_dataset_id_to_contiguous_id"][
+                segment_info["category_id"]
+            ]
+            segment_info["isthing"] = True
+        else:
+            segment_info["category_id"] = meta["stuff_dataset_id_to_contiguous_id"][
+                segment_info["category_id"]
+            ]
+            segment_info["isthing"] = False
+        return segment_info
+
+    with PathManager.open(json_file) as f:
+        json_info = json.load(f)
+
+    ret = []
+    for ann in json_info["annotations"]:
+        image_id = ann["image_id"]
+        # TODO: currently we assume image and label has the same filename but
+        # different extension, and images have extension ".jpg" for COCO. Need
+        # to make image extension a user-provided argument if we extend this
+        # function to support other COCO-like datasets.
+        image_file = os.path.join(image_dir, os.path.splitext(ann["file_name"])[0] + ".jpg")
+        label_file = os.path.join(gt_dir, ann["file_name"])
+        sem_label_file = os.path.join(semseg_dir, ann["file_name"])
+        segments_info = [_convert_category_id(x, meta) for x in ann["segments_info"]]
+        ret.append(
+            {
+                "file_name": image_file,
+                "image_id": image_id,
+                "pan_seg_file_name": label_file,
+                "sem_seg_file_name": sem_label_file,
+                "segments_info": segments_info,
+            }
+        )
+    assert len(ret), f"No images found in {image_dir}!"
+    assert PathManager.isfile(ret[0]["file_name"]), ret[0]["file_name"]
+    assert PathManager.isfile(ret[0]["pan_seg_file_name"]), ret[0]["pan_seg_file_name"]
+    assert PathManager.isfile(ret[0]["sem_seg_file_name"]), ret[0]["sem_seg_file_name"]
+    return ret
+
+
+def register_mapillary_vistas_panoptic(
+    name, metadata, image_root, panoptic_root, semantic_root, panoptic_json, instances_json=None
+):
+    """
+    Register a "standard" version of ADE20k panoptic segmentation dataset named `name`.
+    The dictionaries in this registered dataset follows detectron2's standard format.
+    Hence it's called "standard".
+    Args:
+        name (str): the name that identifies a dataset,
+            e.g. "ade20k_panoptic_train"
+        metadata (dict): extra metadata associated with this dataset.
+        image_root (str): directory which contains all the images
+        panoptic_root (str): directory which contains panoptic annotation images in COCO format
+        panoptic_json (str): path to the json panoptic annotation file in COCO format
+        sem_seg_root (none): not used, to be consistent with
+            `register_coco_panoptic_separated`.
+        instances_json (str): path to the json instance annotation file
+    """
+    panoptic_name = name
+    DatasetCatalog.register(
+        panoptic_name,
+        lambda: load_mapillary_vistas_panoptic_json(
+            panoptic_json, image_root, panoptic_root, semantic_root, metadata
+        ),
+    )
+    MetadataCatalog.get(panoptic_name).set(
+        panoptic_root=panoptic_root,
+        image_root=image_root,
+        panoptic_json=panoptic_json,
+        json_file=instances_json,
+        evaluator_type="mapillary_vistas_panoptic_seg",
+        ignore_label=65,  # different from other datasets, Mapillary Vistas sets ignore_label to 65
+        label_divisor=1000,
+        **metadata,
+    )
+
+
+_PREDEFINED_SPLITS_ADE20K_PANOPTIC = {
+    "openvocab_mapillary_vistas_panoptic_train": (
+        "mapillary_vistas/training/images",
+        "mapillary_vistas/training/panoptic",
+        "mapillary_vistas/training/panoptic/panoptic_2018.json",
+        "mapillary_vistas/training/labels",
+    ),
+    "openvocab_mapillary_vistas_panoptic_val": (
+        "mapillary_vistas/validation/images",
+        "mapillary_vistas/validation/panoptic",
+        "mapillary_vistas/validation/panoptic/panoptic_2018.json",
+        "mapillary_vistas/validation/labels",
+    ),
+}
+
+
+def get_metadata():
+    meta = {}
+    # The following metadata maps contiguous id from [0, #thing categories +
+    # #stuff categories) to their names and colors. We have to replica of the
+    # same name and color under "thing_*" and "stuff_*" because the current
+    # visualization function in D2 handles thing and class classes differently
+    # due to some heuristic used in Panoptic FPN. We keep the same naming to
+    # enable reusing existing visualization functions.
+    thing_classes = [k["name"] for k in MAPILLARY_VISTAS_SEM_SEG_CATEGORIES]
+    thing_colors = [k["color"] for k in MAPILLARY_VISTAS_SEM_SEG_CATEGORIES]
+    stuff_classes = [k["name"] for k in MAPILLARY_VISTAS_SEM_SEG_CATEGORIES]
+    stuff_colors = [k["color"] for k in MAPILLARY_VISTAS_SEM_SEG_CATEGORIES]
+
+    meta["thing_classes"] = thing_classes
+    meta["thing_colors"] = thing_colors
+    meta["stuff_classes"] = stuff_classes
+    meta["stuff_colors"] = stuff_colors
+
+    # Convert category id for training:
+    #   category id: like semantic segmentation, it is the class id for each
+    #   pixel. Since there are some classes not used in evaluation, the category
+    #   id is not always contiguous and thus we have two set of category ids:
+    #       - original category id: category id in the original dataset, mainly
+    #           used for evaluation.
+    #       - contiguous category id: [0, #classes), in order to train the linear
+    #           softmax classifier.
+    thing_dataset_id_to_contiguous_id = {}
+    stuff_dataset_id_to_contiguous_id = {}
+
+    for i, cat in enumerate(MAPILLARY_VISTAS_SEM_SEG_CATEGORIES):
+        if cat["isthing"]:
+            thing_dataset_id_to_contiguous_id[cat["id"]] = i
+        # else:
+        #     stuff_dataset_id_to_contiguous_id[cat["id"]] = i
+
+        # in order to use sem_seg evaluator
+        stuff_dataset_id_to_contiguous_id[cat["id"]] = i
+
+    meta["thing_dataset_id_to_contiguous_id"] = thing_dataset_id_to_contiguous_id
+    meta["stuff_dataset_id_to_contiguous_id"] = stuff_dataset_id_to_contiguous_id
+
+    return meta
+
+
+def register_all_mapillary_vistas_panoptic(root):
+    metadata = get_metadata()
+    for (
+        prefix,
+        (image_root, panoptic_root, panoptic_json, semantic_root),
+    ) in _PREDEFINED_SPLITS_ADE20K_PANOPTIC.items():
+        # The "standard" version of COCO panoptic segmentation dataset,
+        # e.g. used by Panoptic-DeepLab
+        register_mapillary_vistas_panoptic(
+            prefix,
+            metadata,
+            os.path.join(root, image_root),
+            os.path.join(root, panoptic_root),
+            os.path.join(root, semantic_root),
+            os.path.join(root, panoptic_json),
+        )
+
+
+_root = os.getenv("DETECTRON2_DATASETS", "datasets")
+register_all_mapillary_vistas_panoptic(_root)
\ No newline at end of file
diff --git a/mask_adapter/data/datasets/register_objects365.py b/mask_adapter/data/datasets/register_objects365.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e8ce99cecb2ed83723d1313f6e4d72bd6b13d95
--- /dev/null
+++ b/mask_adapter/data/datasets/register_objects365.py
@@ -0,0 +1,409 @@
+from detectron2.data.datasets.register_coco import register_coco_instances
+import os
+
+categories = [
+{'id': 1, 'name': 'person'},
+ {'id': 2, 'name': 'sneakers'},
+ {'id': 3, 'name': 'chair'},
+ {'id': 4, 'name': 'hat'},
+ {'id': 5, 'name': 'lamp'},
+ {'id': 6, 'name': 'bottle'},
+ {'id': 7, 'name': 'cabinet/shelf'},
+ {'id': 8, 'name': 'cup'},
+ {'id': 9, 'name': 'car'},
+ {'id': 10, 'name': 'glasses'},
+ {'id': 11, 'name': 'picture/frame'},
+ {'id': 12, 'name': 'desk'},
+ {'id': 13, 'name': 'handbag'},
+ {'id': 14, 'name': 'street lights'},
+ {'id': 15, 'name': 'book'},
+ {'id': 16, 'name': 'plate'},
+ {'id': 17, 'name': 'helmet'},
+ {'id': 18, 'name': 'leather shoes'},
+ {'id': 19, 'name': 'pillow'},
+ {'id': 20, 'name': 'glove'},
+ {'id': 21, 'name': 'potted plant'},
+ {'id': 22, 'name': 'bracelet'},
+ {'id': 23, 'name': 'flower'},
+ {'id': 24, 'name': 'tv'},
+ {'id': 25, 'name': 'storage box'},
+ {'id': 26, 'name': 'vase'},
+ {'id': 27, 'name': 'bench'},
+ {'id': 28, 'name': 'wine glass'},
+ {'id': 29, 'name': 'boots'},
+ {'id': 30, 'name': 'bowl'},
+ {'id': 31, 'name': 'dining table'},
+ {'id': 32, 'name': 'umbrella'},
+ {'id': 33, 'name': 'boat'},
+ {'id': 34, 'name': 'flag'},
+ {'id': 35, 'name': 'speaker'},
+ {'id': 36, 'name': 'trash bin/can'},
+ {'id': 37, 'name': 'stool'},
+ {'id': 38, 'name': 'backpack'},
+ {'id': 39, 'name': 'couch'},
+ {'id': 40, 'name': 'belt'},
+ {'id': 41, 'name': 'carpet'},
+ {'id': 42, 'name': 'basket'},
+ {'id': 43, 'name': 'towel/napkin'},
+ {'id': 44, 'name': 'slippers'},
+ {'id': 45, 'name': 'barrel/bucket'},
+ {'id': 46, 'name': 'coffee table'},
+ {'id': 47, 'name': 'suv'},
+ {'id': 48, 'name': 'toy'},
+ {'id': 49, 'name': 'tie'},
+ {'id': 50, 'name': 'bed'},
+ {'id': 51, 'name': 'traffic light'},
+ {'id': 52, 'name': 'pen/pencil'},
+ {'id': 53, 'name': 'microphone'},
+ {'id': 54, 'name': 'sandals'},
+ {'id': 55, 'name': 'canned'},
+ {'id': 56, 'name': 'necklace'},
+ {'id': 57, 'name': 'mirror'},
+ {'id': 58, 'name': 'faucet'},
+ {'id': 59, 'name': 'bicycle'},
+ {'id': 60, 'name': 'bread'},
+ {'id': 61, 'name': 'high heels'},
+ {'id': 62, 'name': 'ring'},
+ {'id': 63, 'name': 'van'},
+ {'id': 64, 'name': 'watch'},
+ {'id': 65, 'name': 'sink'},
+ {'id': 66, 'name': 'horse'},
+ {'id': 67, 'name': 'fish'},
+ {'id': 68, 'name': 'apple'},
+ {'id': 69, 'name': 'camera'},
+ {'id': 70, 'name': 'candle'},
+ {'id': 71, 'name': 'teddy bear'},
+ {'id': 72, 'name': 'cake'},
+ {'id': 73, 'name': 'motorcycle'},
+ {'id': 74, 'name': 'wild bird'},
+ {'id': 75, 'name': 'laptop'},
+ {'id': 76, 'name': 'knife'},
+ {'id': 77, 'name': 'traffic sign'},
+ {'id': 78, 'name': 'cell phone'},
+ {'id': 79, 'name': 'paddle'},
+ {'id': 80, 'name': 'truck'},
+ {'id': 81, 'name': 'cow'},
+ {'id': 82, 'name': 'power outlet'},
+ {'id': 83, 'name': 'clock'},
+ {'id': 84, 'name': 'drum'},
+ {'id': 85, 'name': 'fork'},
+ {'id': 86, 'name': 'bus'},
+ {'id': 87, 'name': 'hanger'},
+ {'id': 88, 'name': 'nightstand'},
+ {'id': 89, 'name': 'pot/pan'},
+ {'id': 90, 'name': 'sheep'},
+ {'id': 91, 'name': 'guitar'},
+ {'id': 92, 'name': 'traffic cone'},
+ {'id': 93, 'name': 'tea pot'},
+ {'id': 94, 'name': 'keyboard'},
+ {'id': 95, 'name': 'tripod'},
+ {'id': 96, 'name': 'hockey'},
+ {'id': 97, 'name': 'fan'},
+ {'id': 98, 'name': 'dog'},
+ {'id': 99, 'name': 'spoon'},
+ {'id': 100, 'name': 'blackboard/whiteboard'},
+ {'id': 101, 'name': 'balloon'},
+ {'id': 102, 'name': 'air conditioner'},
+ {'id': 103, 'name': 'cymbal'},
+ {'id': 104, 'name': 'mouse'},
+ {'id': 105, 'name': 'telephone'},
+ {'id': 106, 'name': 'pickup truck'},
+ {'id': 107, 'name': 'orange'},
+ {'id': 108, 'name': 'banana'},
+ {'id': 109, 'name': 'airplane'},
+ {'id': 110, 'name': 'luggage'},
+ {'id': 111, 'name': 'skis'},
+ {'id': 112, 'name': 'soccer'},
+ {'id': 113, 'name': 'trolley'},
+ {'id': 114, 'name': 'oven'},
+ {'id': 115, 'name': 'remote'},
+ {'id': 116, 'name': 'baseball glove'},
+ {'id': 117, 'name': 'paper towel'},
+ {'id': 118, 'name': 'refrigerator'},
+ {'id': 119, 'name': 'train'},
+ {'id': 120, 'name': 'tomato'},
+ {'id': 121, 'name': 'machinery vehicle'},
+ {'id': 122, 'name': 'tent'},
+ {'id': 123, 'name': 'shampoo/shower gel'},
+ {'id': 124, 'name': 'head phone'},
+ {'id': 125, 'name': 'lantern'},
+ {'id': 126, 'name': 'donut'},
+ {'id': 127, 'name': 'cleaning products'},
+ {'id': 128, 'name': 'sailboat'},
+ {'id': 129, 'name': 'tangerine'},
+ {'id': 130, 'name': 'pizza'},
+ {'id': 131, 'name': 'kite'},
+ {'id': 132, 'name': 'computer box'},
+ {'id': 133, 'name': 'elephant'},
+ {'id': 134, 'name': 'toiletries'},
+ {'id': 135, 'name': 'gas stove'},
+ {'id': 136, 'name': 'broccoli'},
+ {'id': 137, 'name': 'toilet'},
+ {'id': 138, 'name': 'stroller'},
+ {'id': 139, 'name': 'shovel'},
+ {'id': 140, 'name': 'baseball bat'},
+ {'id': 141, 'name': 'microwave'},
+ {'id': 142, 'name': 'skateboard'},
+ {'id': 143, 'name': 'surfboard'},
+ {'id': 144, 'name': 'surveillance camera'},
+ {'id': 145, 'name': 'gun'},
+ {'id': 146, 'name': 'life saver'},
+ {'id': 147, 'name': 'cat'},
+ {'id': 148, 'name': 'lemon'},
+ {'id': 149, 'name': 'liquid soap'},
+ {'id': 150, 'name': 'zebra'},
+ {'id': 151, 'name': 'duck'},
+ {'id': 152, 'name': 'sports car'},
+ {'id': 153, 'name': 'giraffe'},
+ {'id': 154, 'name': 'pumpkin'},
+ {'id': 155, 'name': 'piano'},
+ {'id': 156, 'name': 'stop sign'},
+ {'id': 157, 'name': 'radiator'},
+ {'id': 158, 'name': 'converter'},
+ {'id': 159, 'name': 'tissue '},
+ {'id': 160, 'name': 'carrot'},
+ {'id': 161, 'name': 'washing machine'},
+ {'id': 162, 'name': 'vent'},
+ {'id': 163, 'name': 'cookies'},
+ {'id': 164, 'name': 'cutting/chopping board'},
+ {'id': 165, 'name': 'tennis racket'},
+ {'id': 166, 'name': 'candy'},
+ {'id': 167, 'name': 'skating and skiing shoes'},
+ {'id': 168, 'name': 'scissors'},
+ {'id': 169, 'name': 'folder'},
+ {'id': 170, 'name': 'baseball'},
+ {'id': 171, 'name': 'strawberry'},
+ {'id': 172, 'name': 'bow tie'},
+ {'id': 173, 'name': 'pigeon'},
+ {'id': 174, 'name': 'pepper'},
+ {'id': 175, 'name': 'coffee machine'},
+ {'id': 176, 'name': 'bathtub'},
+ {'id': 177, 'name': 'snowboard'},
+ {'id': 178, 'name': 'suitcase'},
+ {'id': 179, 'name': 'grapes'},
+ {'id': 180, 'name': 'ladder'},
+ {'id': 181, 'name': 'pear'},
+ {'id': 182, 'name': 'american football'},
+ {'id': 183, 'name': 'basketball'},
+ {'id': 184, 'name': 'potato'},
+ {'id': 185, 'name': 'paint brush'},
+ {'id': 186, 'name': 'printer'},
+ {'id': 187, 'name': 'billiards'},
+ {'id': 188, 'name': 'fire hydrant'},
+ {'id': 189, 'name': 'goose'},
+ {'id': 190, 'name': 'projector'},
+ {'id': 191, 'name': 'sausage'},
+ {'id': 192, 'name': 'fire extinguisher'},
+ {'id': 193, 'name': 'extension cord'},
+ {'id': 194, 'name': 'facial mask'},
+ {'id': 195, 'name': 'tennis ball'},
+ {'id': 196, 'name': 'chopsticks'},
+ {'id': 197, 'name': 'electronic stove and gas stove'},
+ {'id': 198, 'name': 'pie'},
+ {'id': 199, 'name': 'frisbee'},
+ {'id': 200, 'name': 'kettle'},
+ {'id': 201, 'name': 'hamburger'},
+ {'id': 202, 'name': 'golf club'},
+ {'id': 203, 'name': 'cucumber'},
+ {'id': 204, 'name': 'clutch'},
+ {'id': 205, 'name': 'blender'},
+ {'id': 206, 'name': 'tong'},
+ {'id': 207, 'name': 'slide'},
+ {'id': 208, 'name': 'hot dog'},
+ {'id': 209, 'name': 'toothbrush'},
+ {'id': 210, 'name': 'facial cleanser'},
+ {'id': 211, 'name': 'mango'},
+ {'id': 212, 'name': 'deer'},
+ {'id': 213, 'name': 'egg'},
+ {'id': 214, 'name': 'violin'},
+ {'id': 215, 'name': 'marker'},
+ {'id': 216, 'name': 'ship'},
+ {'id': 217, 'name': 'chicken'},
+ {'id': 218, 'name': 'onion'},
+ {'id': 219, 'name': 'ice cream'},
+ {'id': 220, 'name': 'tape'},
+ {'id': 221, 'name': 'wheelchair'},
+ {'id': 222, 'name': 'plum'},
+ {'id': 223, 'name': 'bar soap'},
+ {'id': 224, 'name': 'scale'},
+ {'id': 225, 'name': 'watermelon'},
+ {'id': 226, 'name': 'cabbage'},
+ {'id': 227, 'name': 'router/modem'},
+ {'id': 228, 'name': 'golf ball'},
+ {'id': 229, 'name': 'pine apple'},
+ {'id': 230, 'name': 'crane'},
+ {'id': 231, 'name': 'fire truck'},
+ {'id': 232, 'name': 'peach'},
+ {'id': 233, 'name': 'cello'},
+ {'id': 234, 'name': 'notepaper'},
+ {'id': 235, 'name': 'tricycle'},
+ {'id': 236, 'name': 'toaster'},
+ {'id': 237, 'name': 'helicopter'},
+ {'id': 238, 'name': 'green beans'},
+ {'id': 239, 'name': 'brush'},
+ {'id': 240, 'name': 'carriage'},
+ {'id': 241, 'name': 'cigar'},
+ {'id': 242, 'name': 'earphone'},
+ {'id': 243, 'name': 'penguin'},
+ {'id': 244, 'name': 'hurdle'},
+ {'id': 245, 'name': 'swing'},
+ {'id': 246, 'name': 'radio'},
+ {'id': 247, 'name': 'CD'},
+ {'id': 248, 'name': 'parking meter'},
+ {'id': 249, 'name': 'swan'},
+ {'id': 250, 'name': 'garlic'},
+ {'id': 251, 'name': 'french fries'},
+ {'id': 252, 'name': 'horn'},
+ {'id': 253, 'name': 'avocado'},
+ {'id': 254, 'name': 'saxophone'},
+ {'id': 255, 'name': 'trumpet'},
+ {'id': 256, 'name': 'sandwich'},
+ {'id': 257, 'name': 'cue'},
+ {'id': 258, 'name': 'kiwi fruit'},
+ {'id': 259, 'name': 'bear'},
+ {'id': 260, 'name': 'fishing rod'},
+ {'id': 261, 'name': 'cherry'},
+ {'id': 262, 'name': 'tablet'},
+ {'id': 263, 'name': 'green vegetables'},
+ {'id': 264, 'name': 'nuts'},
+ {'id': 265, 'name': 'corn'},
+ {'id': 266, 'name': 'key'},
+ {'id': 267, 'name': 'screwdriver'},
+ {'id': 268, 'name': 'globe'},
+ {'id': 269, 'name': 'broom'},
+ {'id': 270, 'name': 'pliers'},
+ {'id': 271, 'name': 'volleyball'},
+ {'id': 272, 'name': 'hammer'},
+ {'id': 273, 'name': 'eggplant'},
+ {'id': 274, 'name': 'trophy'},
+ {'id': 275, 'name': 'dates'},
+ {'id': 276, 'name': 'board eraser'},
+ {'id': 277, 'name': 'rice'},
+ {'id': 278, 'name': 'tape measure/ruler'},
+ {'id': 279, 'name': 'dumbbell'},
+ {'id': 280, 'name': 'hamimelon'},
+ {'id': 281, 'name': 'stapler'},
+ {'id': 282, 'name': 'camel'},
+ {'id': 283, 'name': 'lettuce'},
+ {'id': 284, 'name': 'goldfish'},
+ {'id': 285, 'name': 'meat balls'},
+ {'id': 286, 'name': 'medal'},
+ {'id': 287, 'name': 'toothpaste'},
+ {'id': 288, 'name': 'antelope'},
+ {'id': 289, 'name': 'shrimp'},
+ {'id': 290, 'name': 'rickshaw'},
+ {'id': 291, 'name': 'trombone'},
+ {'id': 292, 'name': 'pomegranate'},
+ {'id': 293, 'name': 'coconut'},
+ {'id': 294, 'name': 'jellyfish'},
+ {'id': 295, 'name': 'mushroom'},
+ {'id': 296, 'name': 'calculator'},
+ {'id': 297, 'name': 'treadmill'},
+ {'id': 298, 'name': 'butterfly'},
+ {'id': 299, 'name': 'egg tart'},
+ {'id': 300, 'name': 'cheese'},
+ {'id': 301, 'name': 'pig'},
+ {'id': 302, 'name': 'pomelo'},
+ {'id': 303, 'name': 'race car'},
+ {'id': 304, 'name': 'rice cooker'},
+ {'id': 305, 'name': 'tuba'},
+ {'id': 306, 'name': 'crosswalk sign'},
+ {'id': 307, 'name': 'papaya'},
+ {'id': 308, 'name': 'hair drier'},
+ {'id': 309, 'name': 'green onion'},
+ {'id': 310, 'name': 'chips'},
+ {'id': 311, 'name': 'dolphin'},
+ {'id': 312, 'name': 'sushi'},
+ {'id': 313, 'name': 'urinal'},
+ {'id': 314, 'name': 'donkey'},
+ {'id': 315, 'name': 'electric drill'},
+ {'id': 316, 'name': 'spring rolls'},
+ {'id': 317, 'name': 'tortoise/turtle'},
+ {'id': 318, 'name': 'parrot'},
+ {'id': 319, 'name': 'flute'},
+ {'id': 320, 'name': 'measuring cup'},
+ {'id': 321, 'name': 'shark'},
+ {'id': 322, 'name': 'steak'},
+ {'id': 323, 'name': 'poker card'},
+ {'id': 324, 'name': 'binoculars'},
+ {'id': 325, 'name': 'llama'},
+ {'id': 326, 'name': 'radish'},
+ {'id': 327, 'name': 'noodles'},
+ {'id': 328, 'name': 'yak'},
+ {'id': 329, 'name': 'mop'},
+ {'id': 330, 'name': 'crab'},
+ {'id': 331, 'name': 'microscope'},
+ {'id': 332, 'name': 'barbell'},
+ {'id': 333, 'name': 'bread/bun'},
+ {'id': 334, 'name': 'baozi'},
+ {'id': 335, 'name': 'lion'},
+ {'id': 336, 'name': 'red cabbage'},
+ {'id': 337, 'name': 'polar bear'},
+ {'id': 338, 'name': 'lighter'},
+ {'id': 339, 'name': 'seal'},
+ {'id': 340, 'name': 'mangosteen'},
+ {'id': 341, 'name': 'comb'},
+ {'id': 342, 'name': 'eraser'},
+ {'id': 343, 'name': 'pitaya'},
+ {'id': 344, 'name': 'scallop'},
+ {'id': 345, 'name': 'pencil case'},
+ {'id': 346, 'name': 'saw'},
+ {'id': 347, 'name': 'table tennis paddle'},
+ {'id': 348, 'name': 'okra'},
+ {'id': 349, 'name': 'starfish'},
+ {'id': 350, 'name': 'eagle'},
+ {'id': 351, 'name': 'monkey'},
+ {'id': 352, 'name': 'durian'},
+ {'id': 353, 'name': 'game board'},
+ {'id': 354, 'name': 'rabbit'},
+ {'id': 355, 'name': 'french horn'},
+ {'id': 356, 'name': 'ambulance'},
+ {'id': 357, 'name': 'asparagus'},
+ {'id': 358, 'name': 'hoverboard'},
+ {'id': 359, 'name': 'pasta'},
+ {'id': 360, 'name': 'target'},
+ {'id': 361, 'name': 'hotair balloon'},
+ {'id': 362, 'name': 'chainsaw'},
+ {'id': 363, 'name': 'lobster'},
+ {'id': 364, 'name': 'iron'},
+ {'id': 365, 'name': 'flashlight'}]
+
+def _get_builtin_metadata_obj365v1():
+    id_to_name = {x['id']: x['name'] for x in categories}
+    thing_dataset_id_to_contiguous_id = {i + 1: i for i in range(365)}
+    thing_classes = [id_to_name[k] for k in sorted(id_to_name)]
+    return {
+        "thing_dataset_id_to_contiguous_id": thing_dataset_id_to_contiguous_id,
+        "thing_classes": thing_classes}
+
+_PREDEFINED_SPLITS_OBJECTS365 = {
+    "objects365_train": ("objects365/train", "objects365/annotations/objects365_train.json"),
+    "objects365_val": ("objects365/val", "objects365/annotations/objects365_val.json"),
+}
+
+# for key, (image_root, json_file) in _PREDEFINED_SPLITS_OBJECTS365.items():
+#     register_coco_instances(
+#         key,
+#         _get_builtin_metadata(),
+#         os.path.join("datasets", json_file) if "://" not in json_file else json_file,
+#         os.path.join("datasets", image_root),
+#     )
+
+_PREDEFINED_SPLITS_OBJECTS365V1 = {
+    "objects365_v1_train": ("Objects365v1/train", "Objects365v1/objects365_train.json"),
+    "objects365_v1_masktrain": ("Objects365v1/train", "Objects365v1/annotations/filtered_objects365_v1_train_with_mask.json"),
+    "objects365_v1_val": ("Objects365v1/val/val", "Objects365v1/objects365_val.json"),
+    "objects365_v1_val_mini": ("Objects365v1/val/val", "Objects365v1/objects365_val_mini.json"),
+}
+
+def register_all_obj365v1(root):
+    for key, (image_root, json_file) in _PREDEFINED_SPLITS_OBJECTS365V1.items():
+        register_coco_instances(
+            key,
+            _get_builtin_metadata_obj365v1(),
+            os.path.join(root, json_file) if "://" not in json_file else json_file,
+            os.path.join(root, image_root),
+        )
+
+_root = os.getenv("DETECTRON2_DATASETS", "datasets")
+register_all_obj365v1(_root)
\ No newline at end of file
diff --git a/mask_adapter/data/datasets/register_pascal_ctx_459_sem_seg.py b/mask_adapter/data/datasets/register_pascal_ctx_459_sem_seg.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6f2d2132d4ceec003c75e2f0c9ae6682fbcb315
--- /dev/null
+++ b/mask_adapter/data/datasets/register_pascal_ctx_459_sem_seg.py
@@ -0,0 +1,81 @@
+"""
+Copyright (2023) Bytedance Ltd. and/or its affiliates
+
+Licensed under the Apache License, Version 2.0 (the "License"); 
+you may not use this file except in compliance with the License. 
+You may obtain a copy of the License at 
+
+    http://www.apache.org/licenses/LICENSE-2.0 
+
+Unless required by applicable law or agreed to in writing, software 
+distributed under the License is distributed on an "AS IS" BASIS, 
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+See the License for the specific language governing permissions and 
+limitations under the License. 
+"""
+
+import os
+
+import numpy as np
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.data.datasets import load_sem_seg
+
+from . import openseg_classes
+
+PASCAL_CTX_459_CATEGORIES=openseg_classes.get_pascal_ctx_459_categories_with_prompt_eng()
+
+PASCAL_CTX_459_COLORS = [k["color"] for k in PASCAL_CTX_459_CATEGORIES]
+
+MetadataCatalog.get("openvocab_pascal_ctx459_sem_seg_train").set(
+    stuff_colors=PASCAL_CTX_459_COLORS[:],
+)
+
+MetadataCatalog.get("openvocab_pascal_ctx459_sem_seg_val").set(
+    stuff_colors=PASCAL_CTX_459_COLORS[:],
+)
+
+def _get_ctx459_meta():
+    # Id 0 is reserved for ignore_label, we change ignore_label for 0
+    # to 255 in our pre-processing, so all ids are shifted by 1.
+    stuff_ids = [k["id"] for k in PASCAL_CTX_459_CATEGORIES]
+    assert len(stuff_ids) == 459, len(stuff_ids)
+
+    # For semantic segmentation, this mapping maps from contiguous stuff id
+    # (in [0, 91], used in models) to ids in the dataset (used for processing results)
+    stuff_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(stuff_ids)}
+    stuff_classes = [k["name"] for k in PASCAL_CTX_459_CATEGORIES]
+
+    ret = {
+        "stuff_dataset_id_to_contiguous_id": stuff_dataset_id_to_contiguous_id,
+        "stuff_classes": stuff_classes,
+    }
+    return ret
+
+
+def register_all_ctx459(root):
+    root = os.path.join(root, "pascal_ctx_d2")
+    meta = _get_ctx459_meta()
+    for name, dirname in [("train", "training"), ("val", "validation")]:
+        image_dir = os.path.join(root, "images", dirname)
+        gt_dir = os.path.join(root, "annotations_ctx459", dirname)
+        name = f"openvocab_pascal_ctx459_sem_seg_{name}"
+        DatasetCatalog.register(
+            name, lambda x=image_dir, y=gt_dir: load_sem_seg(y, x, gt_ext="tif", image_ext="jpg")
+        )
+        MetadataCatalog.get(name).set(
+            stuff_classes=meta["stuff_classes"][:],
+            thing_dataset_id_to_contiguous_id={},  # to make Mask2Former happy
+            stuff_dataset_id_to_contiguous_id=meta["stuff_dataset_id_to_contiguous_id"],
+            image_root=image_dir,
+            sem_seg_root=gt_dir,
+            evaluator_type="sem_seg",
+            ignore_label=65535,  # NOTE: gt is saved in 16-bit TIFF images
+            gt_ext="tif",
+        )
+
+
+
+        
+_root = os.getenv("DETECTRON2_DATASETS", "datasets")
+register_all_ctx459(_root)
\ No newline at end of file
diff --git a/mask_adapter/data/datasets/register_pascal_ctx_59_sem_seg.py b/mask_adapter/data/datasets/register_pascal_ctx_59_sem_seg.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f3bfbf0b9ce94677caaed93c6a4cccf7c38d8c8
--- /dev/null
+++ b/mask_adapter/data/datasets/register_pascal_ctx_59_sem_seg.py
@@ -0,0 +1,78 @@
+"""
+Copyright (2023) Bytedance Ltd. and/or its affiliates
+
+Licensed under the Apache License, Version 2.0 (the "License"); 
+you may not use this file except in compliance with the License. 
+You may obtain a copy of the License at 
+
+    http://www.apache.org/licenses/LICENSE-2.0 
+
+Unless required by applicable law or agreed to in writing, software 
+distributed under the License is distributed on an "AS IS" BASIS, 
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+See the License for the specific language governing permissions and 
+limitations under the License. 
+"""
+
+import os
+
+import numpy as np
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.data.datasets import load_sem_seg
+
+from . import openseg_classes
+
+PASCAL_CTX_59_CATEGORIES=openseg_classes.get_pascal_ctx_59_categories_with_prompt_eng()
+
+PASCAL_CTX_59_COLORS = [k["color"] for k in PASCAL_CTX_59_CATEGORIES]
+
+MetadataCatalog.get("openvocab_pascal_ctx59_sem_seg_train").set(
+    stuff_colors=PASCAL_CTX_59_COLORS[:],
+)
+
+MetadataCatalog.get("openvocab_pascal_ctx59_sem_seg_val").set(
+    stuff_colors=PASCAL_CTX_59_COLORS[:],
+)
+
+def _get_ctx59_meta():
+    # Id 0 is reserved for ignore_label, we change ignore_label for 0
+    # to 255 in our pre-processing, so all ids are shifted by 1.
+    stuff_ids = [k["id"] for k in PASCAL_CTX_59_CATEGORIES]
+    assert len(stuff_ids) == 59, len(stuff_ids)
+
+    # For semantic segmentation, this mapping maps from contiguous stuff id
+    # (in [0, 91], used in models) to ids in the dataset (used for processing results)
+    stuff_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(stuff_ids)}
+    stuff_classes = [k["name"] for k in PASCAL_CTX_59_CATEGORIES]
+
+    ret = {
+        "stuff_dataset_id_to_contiguous_id": stuff_dataset_id_to_contiguous_id,
+        "stuff_classes": stuff_classes,
+    }
+    return ret
+
+
+def register_all_ctx59(root):
+    root = os.path.join(root, "pascal_ctx_d2")
+    meta = _get_ctx59_meta()
+    for name, dirname in [("train", "training"), ("val", "validation")]:
+        image_dir = os.path.join(root, "images", dirname)
+        gt_dir = os.path.join(root, "annotations_ctx59", dirname)
+        name = f"openvocab_pascal_ctx59_sem_seg_{name}"
+        DatasetCatalog.register(
+            name, lambda x=image_dir, y=gt_dir: load_sem_seg(y, x, gt_ext="png", image_ext="jpg")
+        )
+        MetadataCatalog.get(name).set(
+            stuff_classes=meta["stuff_classes"][:],
+            thing_dataset_id_to_contiguous_id={},  # to make Mask2Former happy
+            stuff_dataset_id_to_contiguous_id=meta["stuff_dataset_id_to_contiguous_id"],
+            image_root=image_dir,
+            sem_seg_root=gt_dir,
+            evaluator_type="sem_seg",
+            ignore_label=255,
+            gt_ext="png",
+        )
+        
+_root = os.getenv("DETECTRON2_DATASETS", "datasets")
+register_all_ctx59(_root)
\ No newline at end of file
diff --git a/mask_adapter/data/datasets/register_pascal_voc_20_semantic.py b/mask_adapter/data/datasets/register_pascal_voc_20_semantic.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e8d24954a4523a0fa26fb2c6bccbccef0583a89
--- /dev/null
+++ b/mask_adapter/data/datasets/register_pascal_voc_20_semantic.py
@@ -0,0 +1,80 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+import os
+
+import numpy as np
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.data.datasets import load_sem_seg
+
+"""
+Copyright (2023) Bytedance Ltd. and/or its affiliates
+
+Licensed under the Apache License, Version 2.0 (the "License"); 
+you may not use this file except in compliance with the License. 
+You may obtain a copy of the License at 
+
+    http://www.apache.org/licenses/LICENSE-2.0 
+
+Unless required by applicable law or agreed to in writing, software 
+distributed under the License is distributed on an "AS IS" BASIS, 
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+See the License for the specific language governing permissions and 
+limitations under the License. 
+"""
+
+from . import openseg_classes
+
+PASCAL_VOC_20_CATEGORIES = openseg_classes.get_pascal_21_categories_with_prompt_eng()[1:] # remove background
+
+PASCAL_VOC_20_COLORS = [k["color"] for k in PASCAL_VOC_20_CATEGORIES]
+
+MetadataCatalog.get("openvocab_pascal20_sem_seg_train").set(
+    stuff_colors=PASCAL_VOC_20_COLORS[:],
+)
+
+MetadataCatalog.get("openvocab_pascal20_sem_seg_val").set(
+    stuff_colors=PASCAL_VOC_20_COLORS[:],
+)
+
+
+def _get_pascal20_meta():
+    # Id 0 is reserved for ignore_label, we change ignore_label for 0
+    # to 255 in our pre-processing, so all ids are shifted by 1.
+    stuff_ids = [k["id"] for k in PASCAL_VOC_20_CATEGORIES]
+    assert len(stuff_ids) == 20, len(stuff_ids)
+
+    # For semantic segmentation, this mapping maps from contiguous stuff id
+    # (in [0, 91], used in models) to ids in the dataset (used for processing results)
+    stuff_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(stuff_ids)}
+    stuff_classes = [k["name"] for k in PASCAL_VOC_20_CATEGORIES]
+
+    ret = {
+        "stuff_dataset_id_to_contiguous_id": stuff_dataset_id_to_contiguous_id,
+        "stuff_classes": stuff_classes,
+    }
+    return ret
+
+
+def register_all_pascal20(root):
+    root = os.path.join(root, "pascal_voc_d2")
+    meta = _get_pascal20_meta()
+    for name, dirname in [("train", "training"), ("val", "validation")]:
+        image_dir = os.path.join(root, "images", dirname)
+        gt_dir = os.path.join(root, "annotations_pascal20", dirname)
+        name = f"openvocab_pascal20_sem_seg_{name}"
+        DatasetCatalog.register(
+            name, lambda x=image_dir, y=gt_dir: load_sem_seg(y, x, gt_ext="png", image_ext="jpg")
+        )
+        MetadataCatalog.get(name).set(
+            stuff_classes=meta["stuff_classes"][:],
+            thing_dataset_id_to_contiguous_id={},  # to make Mask2Former happy
+            stuff_dataset_id_to_contiguous_id=meta["stuff_dataset_id_to_contiguous_id"],
+            image_root=image_dir,
+            sem_seg_root=gt_dir,
+            evaluator_type="sem_seg",
+            ignore_label=255,
+            gt_ext="png",
+        )
+        
+_root = os.getenv("DETECTRON2_DATASETS", "datasets")
+register_all_pascal20(_root)
\ No newline at end of file
diff --git a/mask_adapter/data/datasets/register_pascal_voc_21_semantic.py b/mask_adapter/data/datasets/register_pascal_voc_21_semantic.py
new file mode 100644
index 0000000000000000000000000000000000000000..b059b654956f47942252b2bb57f78878ed487343
--- /dev/null
+++ b/mask_adapter/data/datasets/register_pascal_voc_21_semantic.py
@@ -0,0 +1,79 @@
+"""
+Copyright (2023) Bytedance Ltd. and/or its affiliates
+
+Licensed under the Apache License, Version 2.0 (the "License"); 
+you may not use this file except in compliance with the License. 
+You may obtain a copy of the License at 
+
+    http://www.apache.org/licenses/LICENSE-2.0 
+
+Unless required by applicable law or agreed to in writing, software 
+distributed under the License is distributed on an "AS IS" BASIS, 
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+See the License for the specific language governing permissions and 
+limitations under the License. 
+"""
+
+import os
+
+import numpy as np
+
+from detectron2.data import DatasetCatalog, MetadataCatalog
+from detectron2.data.datasets import load_sem_seg
+
+from . import openseg_classes
+
+PASCAL_VOC_21_CATEGORIES = openseg_classes.get_pascal_21_categories_with_prompt_eng()
+
+PASCAL_VOC_21_COLORS = [k["color"] for k in PASCAL_VOC_21_CATEGORIES]
+
+MetadataCatalog.get("openvocab_pascal21_sem_seg_train").set(
+    stuff_colors=PASCAL_VOC_21_COLORS[:],
+)
+
+MetadataCatalog.get("openvocab_pascal21_sem_seg_val").set(
+    stuff_colors=PASCAL_VOC_21_COLORS[:],
+)
+
+
+def _get_pascal21_meta():
+    # Id 0 is reserved for ignore_label, we change ignore_label for 0
+    # to 255 in our pre-processing, so all ids are shifted by 1.
+    stuff_ids = [k["id"] for k in PASCAL_VOC_21_CATEGORIES]
+    assert len(stuff_ids) == 21, len(stuff_ids)
+
+    # For semantic segmentation, this mapping maps from contiguous stuff id
+    # (in [0, 91], used in models) to ids in the dataset (used for processing results)
+    stuff_dataset_id_to_contiguous_id = {k: i for i, k in enumerate(stuff_ids)}
+    stuff_classes = [k["name"] for k in PASCAL_VOC_21_CATEGORIES]
+
+    ret = {
+        "stuff_dataset_id_to_contiguous_id": stuff_dataset_id_to_contiguous_id,
+        "stuff_classes": stuff_classes,
+    }
+    return ret
+
+
+def register_all_pascal21(root):
+    root = os.path.join(root, "pascal_voc_d2")
+    meta = _get_pascal21_meta()
+    for name, dirname in [("train", "training"), ("val", "validation")]:
+        image_dir = os.path.join(root, "images", dirname)
+        gt_dir = os.path.join(root, "annotations_pascal21", dirname)
+        name = f"openvocab_pascal21_sem_seg_{name}"
+        DatasetCatalog.register(
+            name, lambda x=image_dir, y=gt_dir: load_sem_seg(y, x, gt_ext="png", image_ext="jpg")
+        )
+        MetadataCatalog.get(name).set(
+            stuff_classes=meta["stuff_classes"][:],
+            thing_dataset_id_to_contiguous_id={},  # to make Mask2Former happy
+            stuff_dataset_id_to_contiguous_id=meta["stuff_dataset_id_to_contiguous_id"],
+            image_root=image_dir,
+            sem_seg_root=gt_dir,
+            evaluator_type="sem_seg",
+            ignore_label=255,
+            gt_ext="png",
+        )
+        
+_root = os.getenv("DETECTRON2_DATASETS", "datasets")
+register_all_pascal21(_root)
\ No newline at end of file
diff --git a/mask_adapter/evaluation/__init__.py b/mask_adapter/evaluation/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..be75f0cd9568f901b3174ecfb43c0b9f4fa1f77d
--- /dev/null
+++ b/mask_adapter/evaluation/__init__.py
@@ -0,0 +1,15 @@
+"""
+Copyright (2023) Bytedance Ltd. and/or its affiliates
+
+Licensed under the Apache License, Version 2.0 (the "License"); 
+you may not use this file except in compliance with the License. 
+You may obtain a copy of the License at 
+
+    http://www.apache.org/licenses/LICENSE-2.0 
+
+Unless required by applicable law or agreed to in writing, software 
+distributed under the License is distributed on an "AS IS" BASIS, 
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+See the License for the specific language governing permissions and 
+limitations under the License. 
+"""
\ No newline at end of file
diff --git a/mask_adapter/evaluation/__pycache__/__init__.cpython-310.pyc b/mask_adapter/evaluation/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8296131041a43a81a30c0e0d11f34ac6b219a745
Binary files /dev/null and b/mask_adapter/evaluation/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mask_adapter/evaluation/__pycache__/__init__.cpython-38.pyc b/mask_adapter/evaluation/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6e2dd3bb089a8ab0786e664ceba24120832f25f5
Binary files /dev/null and b/mask_adapter/evaluation/__pycache__/__init__.cpython-38.pyc differ
diff --git a/mask_adapter/evaluation/__pycache__/coco_panoptic_evaluation.cpython-310.pyc b/mask_adapter/evaluation/__pycache__/coco_panoptic_evaluation.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..573614979797c48e4f853b4e9eb46e830924d737
Binary files /dev/null and b/mask_adapter/evaluation/__pycache__/coco_panoptic_evaluation.cpython-310.pyc differ
diff --git a/mask_adapter/evaluation/__pycache__/coco_panoptic_evaluation.cpython-38.pyc b/mask_adapter/evaluation/__pycache__/coco_panoptic_evaluation.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..87c42d88f3843880910926df7d7ec690653bfc33
Binary files /dev/null and b/mask_adapter/evaluation/__pycache__/coco_panoptic_evaluation.cpython-38.pyc differ
diff --git a/mask_adapter/evaluation/__pycache__/instance_evaluation.cpython-310.pyc b/mask_adapter/evaluation/__pycache__/instance_evaluation.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a581a5805f2c37af74dadf1eeaeb3b72832c7b8e
Binary files /dev/null and b/mask_adapter/evaluation/__pycache__/instance_evaluation.cpython-310.pyc differ
diff --git a/mask_adapter/evaluation/__pycache__/instance_evaluation.cpython-38.pyc b/mask_adapter/evaluation/__pycache__/instance_evaluation.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..aaa2cdb464bc34bb90a8e58021b3c28448b98dbb
Binary files /dev/null and b/mask_adapter/evaluation/__pycache__/instance_evaluation.cpython-38.pyc differ
diff --git a/mask_adapter/evaluation/instance_evaluation.py b/mask_adapter/evaluation/instance_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..738c08363a26f540bd850125bdb93c6372640bf1
--- /dev/null
+++ b/mask_adapter/evaluation/instance_evaluation.py
@@ -0,0 +1,113 @@
+"""
+This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”).
+All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. 
+
+Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/evaluation/instance_evaluation.py
+"""
+
+import contextlib
+import copy
+import io
+import itertools
+import json
+import logging
+import numpy as np
+import os
+import pickle
+from collections import OrderedDict
+import pycocotools.mask as mask_util
+import torch
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+from tabulate import tabulate
+
+import detectron2.utils.comm as comm
+from detectron2.config import CfgNode
+from detectron2.data import MetadataCatalog
+from detectron2.data.datasets.coco import convert_to_coco_json
+from detectron2.evaluation.coco_evaluation import COCOEvaluator, _evaluate_predictions_on_coco
+from detectron2.evaluation.fast_eval_api import COCOeval_opt
+from detectron2.structures import Boxes, BoxMode, pairwise_iou
+from detectron2.utils.file_io import PathManager
+from detectron2.utils.logger import create_small_table
+
+
+# modified from COCOEvaluator for instance segmetnat
+class InstanceSegEvaluator(COCOEvaluator):
+    """
+    Evaluate AR for object proposals, AP for instance detection/segmentation, AP
+    for keypoint detection outputs using COCO's metrics.
+    See http://cocodataset.org/#detection-eval and
+    http://cocodataset.org/#keypoints-eval to understand its metrics.
+    The metrics range from 0 to 100 (instead of 0 to 1), where a -1 or NaN means
+    the metric cannot be computed (e.g. due to no predictions made).
+
+    In addition to COCO, this evaluator is able to support any bounding box detection,
+    instance segmentation, or keypoint detection dataset.
+    """
+
+    def _eval_predictions(self, predictions, img_ids=None):
+        """
+        Evaluate predictions. Fill self._results with the metrics of the tasks.
+        """
+        self._logger.info("Preparing results for COCO format ...")
+        coco_results = list(itertools.chain(*[x["instances"] for x in predictions]))
+        tasks = self._tasks or self._tasks_from_predictions(coco_results)
+
+        # unmap the category ids for COCO
+        if hasattr(self._metadata, "thing_dataset_id_to_contiguous_id"):
+            dataset_id_to_contiguous_id = self._metadata.thing_dataset_id_to_contiguous_id
+            # all_contiguous_ids = list(dataset_id_to_contiguous_id.values())
+            # num_classes = len(all_contiguous_ids)
+            # assert min(all_contiguous_ids) == 0 and max(all_contiguous_ids) == num_classes - 1
+
+            reverse_id_mapping = {v: k for k, v in dataset_id_to_contiguous_id.items()}
+            for result in coco_results:
+                category_id = result["category_id"]
+                # assert category_id < num_classes, (
+                #     f"A prediction has class={category_id}, "
+                #     f"but the dataset only has {num_classes} classes and "
+                #     f"predicted class id should be in [0, {num_classes - 1}]."
+                # )
+                assert category_id in reverse_id_mapping, (
+                    f"A prediction has class={category_id}, "
+                    f"but the dataset only has class ids in {dataset_id_to_contiguous_id}."
+                )
+                result["category_id"] = reverse_id_mapping[category_id]
+
+        if self._output_dir:
+            file_path = os.path.join(self._output_dir, "coco_instances_results.json")
+            self._logger.info("Saving results to {}".format(file_path))
+            with PathManager.open(file_path, "w") as f:
+                f.write(json.dumps(coco_results))
+                f.flush()
+
+        if not self._do_evaluation:
+            self._logger.info("Annotations are not available for evaluation.")
+            return
+
+        self._logger.info(
+            "Evaluating predictions with {} COCO API...".format(
+                "unofficial" if self._use_fast_impl else "official"
+            )
+        )
+        for task in sorted(tasks):
+            assert task in {"bbox", "segm", "keypoints"}, f"Got unknown task: {task}!"
+            coco_eval = (
+                _evaluate_predictions_on_coco(
+                    self._coco_api,
+                    coco_results,
+                    task,
+                    kpt_oks_sigmas=self._kpt_oks_sigmas,
+                    #use_fast_impl=self._use_fast_impl,
+                    img_ids=img_ids,
+                    max_dets_per_image=self._max_dets_per_image,
+                )
+                if len(coco_results) > 0
+                else None  # cocoapi does not handle empty results very well
+            )
+
+            res = self._derive_coco_results(
+                coco_eval, task, class_names=self._metadata.get("thing_classes")
+            )
+            self._results[task] = res
diff --git a/mask_adapter/evaluation/panoptic_evaluation.py b/mask_adapter/evaluation/panoptic_evaluation.py
new file mode 100644
index 0000000000000000000000000000000000000000..ff3787d230e63edafce5013891c6873aa5d4ce90
--- /dev/null
+++ b/mask_adapter/evaluation/panoptic_evaluation.py
@@ -0,0 +1,274 @@
+"""
+This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”).
+All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. 
+
+Reference: https://github.com/cocodataset/panopticapi/blob/master/panopticapi/evaluation.py
+Reference: https://github.com/open-mmlab/mmdetection/pull/7538
+"""
+
+#!/usr/bin/env python
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+import os, sys
+import numpy as np
+import json
+import time
+from datetime import timedelta
+from collections import defaultdict
+import argparse
+import multiprocessing
+
+import PIL.Image as Image
+
+from panopticapi.utils import get_traceback, rgb2id
+
+OFFSET = 256 * 256 * 256
+VOID = 0
+
+class PQStatCat():
+        def __init__(self):
+            self.iou = 0.0
+            self.tp = 0
+            self.fp = 0
+            self.fn = 0
+
+        def __iadd__(self, pq_stat_cat):
+            self.iou += pq_stat_cat.iou
+            self.tp += pq_stat_cat.tp
+            self.fp += pq_stat_cat.fp
+            self.fn += pq_stat_cat.fn
+            return self
+
+
+class PQStat():
+    def __init__(self):
+        self.pq_per_cat = defaultdict(PQStatCat)
+
+    def __getitem__(self, i):
+        return self.pq_per_cat[i]
+
+    def __iadd__(self, pq_stat):
+        for label, pq_stat_cat in pq_stat.pq_per_cat.items():
+            self.pq_per_cat[label] += pq_stat_cat
+        return self
+
+    def pq_average(self, categories, isthing):
+        pq, sq, rq, n = 0, 0, 0, 0
+        per_class_results = {}
+        for label, label_info in categories.items():
+            if isthing is not None:
+                cat_isthing = label_info['isthing'] == 1
+                if isthing != cat_isthing:
+                    continue
+            iou = self.pq_per_cat[label].iou
+            tp = self.pq_per_cat[label].tp
+            fp = self.pq_per_cat[label].fp
+            fn = self.pq_per_cat[label].fn
+            if tp + fp + fn == 0:
+                per_class_results[label] = {'pq': 0.0, 'sq': 0.0, 'rq': 0.0}
+                continue
+            n += 1
+            pq_class = iou / (tp + 0.5 * fp + 0.5 * fn)
+            sq_class = iou / tp if tp != 0 else 0
+            rq_class = tp / (tp + 0.5 * fp + 0.5 * fn)
+            per_class_results[label] = {'pq': pq_class, 'sq': sq_class, 'rq': rq_class}
+            pq += pq_class
+            sq += sq_class
+            rq += rq_class
+
+        return {'pq': pq / n, 'sq': sq / n, 'rq': rq / n, 'n': n}, per_class_results
+
+
+@get_traceback
+def pq_compute_single_core(proc_id, annotation_set, gt_folder, pred_folder, categories):
+    pq_stat = PQStat()
+
+    idx = 0
+    for gt_ann, pred_ann in annotation_set:
+        if idx % 100 == 0:
+            print('Core: {}, {} from {} images processed'.format(proc_id, idx, len(annotation_set)))
+        idx += 1
+
+        pan_gt = np.array(Image.open(os.path.join(gt_folder, gt_ann['file_name'])), dtype=np.uint32)
+        pan_gt = rgb2id(pan_gt)
+        pan_pred = np.array(Image.open(os.path.join(pred_folder, pred_ann['file_name'])), dtype=np.uint32)
+        pan_pred = rgb2id(pan_pred)
+
+        gt_segms = {el['id']: el for el in gt_ann['segments_info']}
+        pred_segms = {el['id']: el for el in pred_ann['segments_info']}
+
+        # predicted segments area calculation + prediction sanity checks
+        pred_labels_set = set(el['id'] for el in pred_ann['segments_info'])
+        labels, labels_cnt = np.unique(pan_pred, return_counts=True)
+        for label, label_cnt in zip(labels, labels_cnt):
+            if label not in pred_segms:
+                if label == VOID:
+                    continue
+                raise KeyError('In the image with ID {} segment with ID {} is presented in PNG and not presented in JSON.'.format(gt_ann['image_id'], label))
+            pred_segms[label]['area'] = label_cnt
+            pred_labels_set.remove(label)
+            if pred_segms[label]['category_id'] not in categories:
+                raise KeyError('In the image with ID {} segment with ID {} has unknown category_id {}.'.format(gt_ann['image_id'], label, pred_segms[label]['category_id']))
+        if len(pred_labels_set) != 0:
+            raise KeyError('In the image with ID {} the following segment IDs {} are presented in JSON and not presented in PNG.'.format(gt_ann['image_id'], list(pred_labels_set)))
+
+        # confusion matrix calculation
+        pan_gt_pred = pan_gt.astype(np.uint64) * OFFSET + pan_pred.astype(np.uint64)
+        gt_pred_map = {}
+        labels, labels_cnt = np.unique(pan_gt_pred, return_counts=True)
+        for label, intersection in zip(labels, labels_cnt):
+            gt_id = label // OFFSET
+            pred_id = label % OFFSET
+            gt_pred_map[(gt_id, pred_id)] = intersection
+
+        # count all matched pairs
+        gt_matched = set()
+        pred_matched = set()
+        for label_tuple, intersection in gt_pred_map.items():
+            gt_label, pred_label = label_tuple
+            if gt_label not in gt_segms:
+                continue
+            if pred_label not in pred_segms:
+                continue
+            if gt_segms[gt_label]['iscrowd'] == 1:
+                continue
+            if gt_segms[gt_label]['category_id'] != pred_segms[pred_label]['category_id']:
+                continue
+
+            union = pred_segms[pred_label]['area'] + gt_segms[gt_label]['area'] - intersection - gt_pred_map.get((VOID, pred_label), 0)
+            iou = intersection / union
+            if iou > 0.5:
+                pq_stat[gt_segms[gt_label]['category_id']].tp += 1
+                pq_stat[gt_segms[gt_label]['category_id']].iou += iou
+                gt_matched.add(gt_label)
+                pred_matched.add(pred_label)
+
+        # count false positives
+        crowd_labels_dict = {}
+        for gt_label, gt_info in gt_segms.items():
+            if gt_label in gt_matched:
+                continue
+            # crowd segments are ignored
+            if gt_info['iscrowd'] == 1:
+                crowd_labels_dict[gt_info['category_id']] = gt_label
+                continue
+            pq_stat[gt_info['category_id']].fn += 1
+
+        # count false positives
+        for pred_label, pred_info in pred_segms.items():
+            if pred_label in pred_matched:
+                continue
+            # intersection of the segment with VOID
+            intersection = gt_pred_map.get((VOID, pred_label), 0)
+            # plus intersection with corresponding CROWD region if it exists
+            if pred_info['category_id'] in crowd_labels_dict:
+                intersection += gt_pred_map.get((crowd_labels_dict[pred_info['category_id']], pred_label), 0)
+            # predicted segment is ignored if more than half of the segment correspond to VOID and CROWD regions
+            if intersection / pred_info['area'] > 0.5:
+                continue
+            pq_stat[pred_info['category_id']].fp += 1
+    print('Core: {}, all {} images processed'.format(proc_id, len(annotation_set)))
+    return pq_stat
+
+
+def pq_compute_multi_core(matched_annotations_list, gt_folder, pred_folder, categories):
+    cpu_num = multiprocessing.cpu_count()
+    annotations_split = np.array_split(matched_annotations_list, cpu_num)
+    print("Number of cores: {}, images per core: {}".format(cpu_num, len(annotations_split[0])))
+    workers = multiprocessing.Pool(processes=cpu_num)
+    processes = []
+    for proc_id, annotation_set in enumerate(annotations_split):
+        p = workers.apply_async(pq_compute_single_core,
+                                (proc_id, annotation_set, gt_folder, pred_folder, categories))
+        processes.append(p)
+
+    # https://github.com/open-mmlab/mmdetection/pull/7538
+    # Close the process pool, otherwise it will lead to memory
+    # leaking problems.
+    workers.close()
+    workers.join()
+
+
+    pq_stat = PQStat()
+    for p in processes:
+        pq_stat += p.get()
+    return pq_stat
+
+
+def pq_compute(gt_json_file, pred_json_file, gt_folder=None, pred_folder=None):
+
+    start_time = time.time()
+    with open(gt_json_file, 'r') as f:
+        gt_json = json.load(f)
+    with open(pred_json_file, 'r') as f:
+        pred_json = json.load(f)
+
+    if gt_folder is None:
+        gt_folder = gt_json_file.replace('.json', '')
+    if pred_folder is None:
+        pred_folder = pred_json_file.replace('.json', '')
+    categories = {el['id']: el for el in gt_json['categories']}
+
+    print("Evaluation panoptic segmentation metrics:")
+    print("Ground truth:")
+    print("\tSegmentation folder: {}".format(gt_folder))
+    print("\tJSON file: {}".format(gt_json_file))
+    print("Prediction:")
+    print("\tSegmentation folder: {}".format(pred_folder))
+    print("\tJSON file: {}".format(pred_json_file))
+
+    if not os.path.isdir(gt_folder):
+        raise Exception("Folder {} with ground truth segmentations doesn't exist".format(gt_folder))
+    if not os.path.isdir(pred_folder):
+        raise Exception("Folder {} with predicted segmentations doesn't exist".format(pred_folder))
+
+    pred_annotations = {el['image_id']: el for el in pred_json['annotations']}
+    matched_annotations_list = []
+    for gt_ann in gt_json['annotations']:
+        image_id = gt_ann['image_id']
+        if image_id not in pred_annotations:
+            raise Exception('no prediction for the image with id: {}'.format(image_id))
+        matched_annotations_list.append((gt_ann, pred_annotations[image_id]))
+
+    pq_stat = pq_compute_multi_core(matched_annotations_list, gt_folder, pred_folder, categories)
+
+    metrics = [("All", None), ("Things", True), ("Stuff", False)]
+    results = {}
+    for name, isthing in metrics:
+        results[name], per_class_results = pq_stat.pq_average(categories, isthing=isthing)
+        if name == 'All':
+            results['per_class'] = per_class_results
+    print("{:10s}| {:>5s}  {:>5s}  {:>5s} {:>5s}".format("", "PQ", "SQ", "RQ", "N"))
+    print("-" * (10 + 7 * 4))
+
+    for name, _isthing in metrics:
+        print("{:10s}| {:5.1f}  {:5.1f}  {:5.1f} {:5d}".format(
+            name,
+            100 * results[name]['pq'],
+            100 * results[name]['sq'],
+            100 * results[name]['rq'],
+            results[name]['n'])
+        )
+
+    t_delta = time.time() - start_time
+    print("Time elapsed: {:0.2f} seconds".format(t_delta))
+
+    return results
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--gt_json_file', type=str,
+                        help="JSON file with ground truth data")
+    parser.add_argument('--pred_json_file', type=str,
+                        help="JSON file with predictions data")
+    parser.add_argument('--gt_folder', type=str, default=None,
+                        help="Folder with ground turth COCO format segmentations. \
+                              Default: X if the corresponding json file is X.json")
+    parser.add_argument('--pred_folder', type=str, default=None,
+                        help="Folder with prediction COCO format segmentations. \
+                              Default: X if the corresponding json file is X.json")
+    args = parser.parse_args()
+    pq_compute(args.gt_json_file, args.pred_json_file, args.gt_folder, args.pred_folder)
\ No newline at end of file
diff --git a/mask_adapter/mask_adapter.py b/mask_adapter/mask_adapter.py
new file mode 100644
index 0000000000000000000000000000000000000000..bfd413c481b60bb7bd66e47a1cb937079c8c14dd
--- /dev/null
+++ b/mask_adapter/mask_adapter.py
@@ -0,0 +1,740 @@
+"""
+This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”).
+All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. 
+
+Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/maskformer_model.py
+"""
+from typing import Tuple
+import os
+import numpy as np
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torchvision import transforms as T
+from detectron2.config import configurable
+from detectron2.data import MetadataCatalog
+from detectron2.modeling import META_ARCH_REGISTRY, build_backbone
+from detectron2.modeling.backbone import Backbone
+from detectron2.modeling.postprocessing import sem_seg_postprocess
+from detectron2.structures import Boxes, ImageList, Instances, BitMasks
+from detectron2.utils.memory import retry_if_cuda_oom
+from .modeling.maft.content_dependent_transfer import ContentDependentTransfer
+from .modeling.meta_arch.mask_adapter_head import build_mask_adapter
+
+
+
+
+VILD_PROMPT = [
+    "a photo of a {}.",
+    "This is a photo of a {}",
+    "There is a {} in the scene",
+    "There is the {} in the scene",
+    "a photo of a {} in the scene",
+    "a photo of a small {}.",
+    "a photo of a medium {}.",
+    "a photo of a large {}.",
+    "This is a photo of a small {}.",
+    "This is a photo of a medium {}.",
+    "This is a photo of a large {}.",
+    "There is a small {} in the scene.",
+    "There is a medium {} in the scene.",
+    "There is a large {} in the scene.",
+]
+
+@META_ARCH_REGISTRY.register()
+class MASK_Adapter(nn.Module):
+    """
+    Main class for mask classification semantic segmentation architectures.
+    """
+
+    @configurable
+    def __init__(
+        self,
+        *,
+        backbone: Backbone,
+        mask_adapter: nn.Module,
+        weight_dict,
+        num_queries: int,
+        object_mask_threshold: float,
+        overlap_threshold: float,
+        mask_threshold: float,
+        train_metadata,
+        test_metadata,
+        size_divisibility: int,
+        sem_seg_postprocess_before_inference: bool,
+        pixel_mean: Tuple[float],
+        pixel_std: Tuple[float],
+        # inference
+        semantic_on: bool,
+        panoptic_on: bool,
+        instance_on: bool,
+        test_topk_per_image: int,
+        train_maft : bool,
+        num_output_maps: int,
+    ):
+        """
+        Args:
+            backbone: a backbone module, must follow detectron2's backbone interface
+            mask_adapter: mask-adapter extract semantic activation maps from masks
+            weight_dict: dict contains weight for each loss
+            num_queries: int, number of queries
+            object_mask_threshold: float, threshold to filter query based on classification score
+                for panoptic segmentation inference
+            overlap_threshold: overlap threshold used in general inference for panoptic segmentation
+            metadata: dataset meta, get `thing` and `stuff` category names for panoptic
+                segmentation inference
+            size_divisibility: Some backbones require the input height and width to be divisible by a
+                specific integer. We can use this to override such requirement.
+            sem_seg_postprocess_before_inference: whether to resize the prediction back
+                to original input size before semantic segmentation inference or after.
+                For high-resolution dataset like Mapillary, resizing predictions before
+                inference will cause OOM error.
+            pixel_mean, pixel_std: list or tuple with #channels element, representing
+                the per-channel mean and std to be used to normalize the input image
+            semantic_on: bool, whether to output semantic segmentation prediction
+            instance_on: bool, whether to output instance segmentation prediction
+            panoptic_on: bool, whether to output panoptic segmentation prediction
+            test_topk_per_image: int, instance segmentation parameter, keep topk instances per image
+        """
+        super().__init__()
+        self.backbone = backbone
+        self.mask_adapter = mask_adapter
+        self.weight_dict = weight_dict
+        self.num_queries = num_queries
+        self.overlap_threshold = overlap_threshold
+        self.object_mask_threshold = object_mask_threshold
+        self.mask_threshold = mask_threshold
+        self.train_metadata = train_metadata
+        self.test_metadata = test_metadata
+        if size_divisibility < 0:
+            # use backbone size_divisibility if not set
+            size_divisibility = self.backbone.size_divisibility
+        self.size_divisibility = size_divisibility
+        self.sem_seg_postprocess_before_inference = sem_seg_postprocess_before_inference
+        self.register_buffer("pixel_mean", torch.Tensor(pixel_mean).view(-1, 1, 1), False)
+        self.register_buffer("pixel_std", torch.Tensor(pixel_std).view(-1, 1, 1), False)
+
+        # additional args
+        self.semantic_on = semantic_on
+        self.instance_on = instance_on
+        self.panoptic_on = panoptic_on
+        self.test_topk_per_image = test_topk_per_image
+
+        if not self.semantic_on:
+            assert self.sem_seg_postprocess_before_inference
+                
+        self.void_embedding = nn.Embedding(1, backbone.dim_latent)
+        self.train_dataname = None
+        self.test_dataname = None
+        self.train_num_templates = {}
+        self.train_text_classifier = {}
+        self.train_maft = train_maft
+        self.num_output_maps = num_output_maps
+        
+        if self.train_maft:
+            if '_base' in backbone.model_name.lower():
+                cdt_params = [640, 8]
+            elif '_large' in backbone.model_name.lower():
+                cdt_params = [768, 8]
+            self.cdt = ContentDependentTransfer(d_model = cdt_params[0], nhead = cdt_params[1], panoptic_on = panoptic_on)
+            self.freeze_cdt()
+                       
+    def freeze_cdt(self):
+        for param in self.cdt.parameters():
+            param.requires_grad = False
+
+    #https://github.com/bytedance/fc-clip/blob/2b0bbe213070d44da9182530fa2e826fef03f974/fcclip/fcclip.py#L139
+    def prepare_class_names_from_metadata(self, metadata, train_metadata):
+        def split_labels(x):
+            res = []
+            for x_ in x:
+                x_ = x_.replace(', ', ',')
+                x_ = x_.split(',') # there can be multiple synonyms for single class
+                res.append(x_)
+            return res
+        # get text classifier
+        try:
+            class_names = split_labels(metadata.stuff_classes) # it includes both thing and stuff
+            train_class_names = split_labels(train_metadata.stuff_classes)
+        except:
+            # this could be for insseg, where only thing_classes are available
+            class_names = split_labels(metadata.thing_classes)
+            train_class_names = split_labels(train_metadata.thing_classes)
+        train_class_names = {l for label in train_class_names for l in label}
+        category_overlapping_list = []
+        for test_class_names in class_names:
+            is_overlapping = not set(train_class_names).isdisjoint(set(test_class_names))
+            category_overlapping_list.append(is_overlapping)
+        category_overlapping_mask = torch.tensor(
+            category_overlapping_list, dtype=torch.long)
+        
+        def fill_all_templates_ensemble(x_=''):
+            res = []
+            for x in x_:
+                for template in VILD_PROMPT:
+                    res.append(template.format(x))
+            return res, len(res) // len(VILD_PROMPT)
+       
+        num_templates = []
+        templated_class_names = []
+        for x in class_names:
+            templated_classes, templated_classes_num = fill_all_templates_ensemble(x)
+            templated_class_names += templated_classes
+            num_templates.append(templated_classes_num) # how many templates for current classes
+        class_names = templated_class_names
+        #print("text for classification:", class_names)
+        return category_overlapping_mask, num_templates, class_names
+
+    def set_metadata(self, metadata):
+        self.test_metadata = metadata
+        self.category_overlapping_mask, self.test_num_templates, self.test_class_names = self.prepare_class_names_from_metadata(metadata, self.train_metadata)
+        self.test_text_classifier = None
+        return
+
+    def get_text_classifier(self, dataname):
+        
+        if self.training:
+            os.makedirs("text_embedding", exist_ok=True) 
+            out_path = f"./text_embedding/{dataname}_text_embedding.npy"
+            if dataname in self.train_text_classifier:
+                return self.train_text_classifier[dataname], self.train_num_templates[dataname]
+            
+            if dataname not in self.train_num_templates:
+                _, self.train_num_templates[dataname], train_class_names = self.prepare_class_names_from_metadata(
+                    self.train_metadata[dataname], self.train_metadata[dataname]
+                )
+            
+            if os.path.exists(out_path):
+                text_classifier = torch.from_numpy(np.load(out_path)).to(self.device)
+            else:
+                text_classifier = []
+                bs = 128
+
+                for idx in range(0, len(train_class_names), bs):
+                    text_classifier.append(
+                        self.backbone.get_text_classifier(train_class_names[idx:idx+bs], self.device).detach()
+                    )
+                text_classifier = torch.cat(text_classifier, dim=0)
+
+                text_classifier /= text_classifier.norm(dim=-1, keepdim=True)
+                text_classifier = text_classifier.reshape(text_classifier.shape[0] // len(VILD_PROMPT), len(VILD_PROMPT), text_classifier.shape[-1]).mean(1)
+                text_classifier /= text_classifier.norm(dim=-1, keepdim=True)
+                
+                np.save(out_path, text_classifier.cpu().numpy())
+            
+            self.train_text_classifier[dataname] = text_classifier
+            return self.train_text_classifier[dataname], self.train_num_templates[dataname]
+        else:
+            if self.test_dataname != dataname:
+                self.category_overlapping_mask, self.test_num_templates, self.test_class_names = self.prepare_class_names_from_metadata(
+                    self.test_metadata[dataname], self.test_metadata[dataname]
+                )
+                text_classifier = []
+                bs = 128
+                for idx in range(0, len(self.test_class_names), bs):
+                    text_classifier.append(
+                        self.backbone.get_text_classifier(self.test_class_names[idx:idx+bs], self.device).detach()
+                    )
+                text_classifier = torch.cat(text_classifier, dim=0)
+
+                text_classifier /= text_classifier.norm(dim=-1, keepdim=True)
+                text_classifier = text_classifier.reshape(text_classifier.shape[0] // len(VILD_PROMPT), len(VILD_PROMPT), text_classifier.shape[-1]).mean(1)
+                text_classifier /= text_classifier.norm(dim=-1, keepdim=True)
+                self.test_text_classifier = text_classifier
+                self.test_dataname = dataname
+                
+            return self.test_text_classifier, self.test_num_templates
+
+    @classmethod
+    def from_config(cls, cfg):
+        backbone = build_backbone(cfg)
+        mask_adapter = build_mask_adapter(cfg, cfg.MODEL.MASK_ADAPTER.NAME)
+
+        # loss weights
+        class_weight = cfg.MODEL.MASK_FORMER.CLASS_WEIGHT
+
+        # building criterion
+        weight_dict = {"loss_ce": class_weight}
+
+        losses = ["labels"]
+
+        train_metadata = {i: MetadataCatalog.get(i) for i in cfg.DATASETS.TRAIN}
+        test_metadata = {i: MetadataCatalog.get(i) for i in cfg.DATASETS.TEST}
+
+        return {
+            "backbone": backbone,
+            "mask_adapter": mask_adapter,
+            "weight_dict": weight_dict,
+            "num_queries": cfg.MODEL.MASK_FORMER.NUM_OBJECT_QUERIES,
+            "object_mask_threshold": cfg.MODEL.MASK_FORMER.TEST.OBJECT_MASK_THRESHOLD,
+            "overlap_threshold": cfg.MODEL.MASK_FORMER.TEST.OVERLAP_THRESHOLD,
+            "mask_threshold": cfg.MODEL.MASK_ADAPTER.MASK_THRESHOLD,
+            "train_metadata": train_metadata,#MetadataCatalog.get(cfg.DATASETS.TRAIN[0]),
+            "test_metadata": test_metadata, # MetadataCatalog.get(cfg.DATASETS.TEST[0]),
+            "size_divisibility": cfg.MODEL.MASK_FORMER.SIZE_DIVISIBILITY,
+            "sem_seg_postprocess_before_inference": (
+                cfg.MODEL.MASK_FORMER.TEST.SEM_SEG_POSTPROCESSING_BEFORE_INFERENCE
+                or cfg.MODEL.MASK_FORMER.TEST.PANOPTIC_ON
+                or cfg.MODEL.MASK_FORMER.TEST.INSTANCE_ON
+            ),
+            "pixel_mean": cfg.MODEL.PIXEL_MEAN,
+            "pixel_std": cfg.MODEL.PIXEL_STD,
+            # inference
+            "semantic_on": cfg.MODEL.MASK_FORMER.TEST.SEMANTIC_ON,
+            "instance_on": cfg.MODEL.MASK_FORMER.TEST.INSTANCE_ON,
+            "panoptic_on": cfg.MODEL.MASK_FORMER.TEST.PANOPTIC_ON,
+            "test_topk_per_image": cfg.TEST.DETECTIONS_PER_IMAGE,
+            "train_maft": cfg.MODEL.MASK_ADAPTER.TRAIN_MAFT,
+            "num_output_maps": cfg.MODEL.MASK_ADAPTER.NUM_OUTPUT_MAPS
+        }
+
+    @property
+    def device(self):
+        return self.pixel_mean.device
+
+    def forward(self, batched_inputs):
+        """
+        Args:
+            batched_inputs: a list, batched outputs of :class:`DatasetMapper`.
+                Each item in the list contains the inputs for one image.
+                For now, each item in the list is a dict that contains:
+                   * "image": Tensor, image in (C, H, W) format.
+                   * "instances": per-region ground truth
+                   * Other information that's included in the original dicts, such as:
+                     "height", "width" (int): the output resolution of the model (may be different
+                     from input resolution), used in inference.
+        Returns:
+            list[dict]:
+                each dict has the results for one image. The dict contains the following keys:
+
+                * "sem_seg":
+                    A Tensor that represents the
+                    per-pixel segmentation prediced by the head.
+                    The prediction has shape KxHxW that represents the logits of
+                    each class for each pixel.
+                * "panoptic_seg":
+                    A tuple that represent panoptic output
+                    panoptic_seg (Tensor): of shape (height, width) where the values are ids for each segment.
+                    segments_info (list[dict]): Describe each segment in `panoptic_seg`.
+                        Each dict contains keys "id", "category_id", "isthing".
+        """
+        if self.train_maft and self.training :
+            dataname = "openvocab_coco_2017_train_stuff_sem_seg"
+        else:
+            dataname = batched_inputs[0]['dataname']
+            if self.training:
+                dataname_2 = batched_inputs[1]['dataname']
+                assert dataname == dataname_2, f"expect batch img from same dataset, but different from {dataname} and {dataname_2}"
+
+        images = [x["image"].to(self.device) for x in batched_inputs]
+        images = [(x - self.pixel_mean) / self.pixel_std for x in images]
+        images = ImageList.from_tensors(images, self.size_divisibility)
+
+        features = self.backbone(images.tensor)
+        
+        clip_feature = features['clip_vis_dense']
+        text_classifier, num_templates = self.get_text_classifier(dataname)
+        
+        text_classifier = torch.cat([text_classifier, F.normalize(self.void_embedding.weight, dim=-1)], dim=0)
+        
+        clip_vis_dense = self.visual_prediction_forward_convnext_2d(clip_feature)
+        
+        if self.train_maft:
+            #https://github.com/jiaosiyu1999/MAFT-Plus/blob/fd12806df651d309883229de9503e40533f92689/maft/maft_plus.py#L352
+            #For maftp,it uses a wrong reshape operation to get clip_vis_dense. Since we don't finetune cdt, we follow them. 
+            img_feat = self.visual_prediction_forward_convnext(clip_feature)
+            text_classifier = self.cdt(img_feat, text_classifier)
+            clip_vis_dense = img_feat
+        else:
+            clip_vis_dense = self.visual_prediction_forward_convnext_2d(clip_feature)
+            
+        if self.training:
+            # mask classification target
+            if "instances" in batched_inputs[0]:
+                gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
+                targets,masks,labels = self.prepare_targets(gt_instances, images)
+            else:
+                targets = None            
+
+            semantic_activation_maps = self.mask_adapter(clip_vis_dense, masks)
+                
+            maps_for_pooling = F.interpolate(semantic_activation_maps, size=clip_feature.shape[-2:],
+                                                mode='bilinear', align_corners=False)
+            if "convnext" in self.backbone.model_name.lower():
+                B, C = clip_feature.size(0),clip_feature.size(1)
+                N = maps_for_pooling.size(1)
+                num_instances = N // self.num_output_maps
+                maps_for_pooling = F.softmax(F.logsigmoid(maps_for_pooling).view(B, N,-1), dim=-1)
+                pooled_clip_feature = torch.bmm(maps_for_pooling, clip_feature.view(B, C, -1).permute(0, 2, 1))
+                pooled_clip_feature = self.backbone.visual_prediction_forward(pooled_clip_feature)
+                pooled_clip_feature = (pooled_clip_feature.reshape(B,num_instances, self.num_output_maps, -1).mean(dim=-2).contiguous())
+            else:
+                raise NotImplementedError
+                        
+            mask_cls_results = get_classification_logits(pooled_clip_feature, text_classifier, self.backbone.clip_model.logit_scale, num_templates)
+
+            losses = self.cross_entropy_loss(mask_cls_results, labels)
+            
+            for k in list(losses.keys()):
+                if k in self.weight_dict:
+                    losses[k] *= self.weight_dict[k]
+                else:
+                    # remove this loss if not specified in `weight_dict`
+                    losses.pop(k)
+            return losses
+        else:          
+            masks = []
+            classes = []
+            for input_per_image in batched_inputs:
+                height = input_per_image.get("height")
+                width = input_per_image.get("width")
+                sem_seg = input_per_image["sem_seg"].to(self.device)
+                total_masks,class_label = self.sem_seg_2_gt_masks(sem_seg, height, width)
+                masks.append(total_masks)
+                classes.append(class_label)
+            masks = torch.stack(masks)            
+            classes =  torch.stack(classes)
+                        
+            outputs = self.mask_adapter(clip_vis_dense, masks)
+            
+            maps_for_pooling = F.interpolate(outputs, size=clip_vis_dense.shape[-2:],
+                                                mode='bilinear', align_corners=False)
+            if "convnext" in self.backbone.model_name.lower():
+                B,C = clip_feature.size(0),clip_feature.size(1)
+                N = maps_for_pooling.size(1)
+                num_instances = N // self.num_output_maps
+                maps_for_pooling = F.softmax(F.logsigmoid(maps_for_pooling).view(B, N,-1), dim=-1)
+                pooled_clip_feature = torch.bmm(maps_for_pooling, clip_feature.view(B, C, -1).permute(0, 2, 1))
+                pooled_clip_feature = self.backbone.visual_prediction_forward(pooled_clip_feature)
+                pooled_clip_feature = (pooled_clip_feature.reshape(B,num_instances, self.num_output_maps, -1).mean(dim=-2).contiguous())
+            else:
+                raise NotImplementedError
+            
+            mask_cls_results = get_classification_logits(pooled_clip_feature, text_classifier, self.backbone.clip_model.logit_scale, num_templates)
+
+            mask_cls_results = mask_cls_results.softmax(-1)
+
+            #upsample masks
+            mask_pred_results = F.interpolate(
+                masks,
+                size=(images.tensor.shape[-2], images.tensor.shape[-1]),
+                mode="bilinear",
+                align_corners=False,
+            )
+
+            processed_results = []
+            for mask_cls_result, mask_pred_result, input_per_image, image_size in zip(
+                mask_cls_results, mask_pred_results, batched_inputs, images.image_sizes
+            ):  
+                
+                height = input_per_image.get("height", image_size[0])
+                width = input_per_image.get("width", image_size[1])
+                processed_results.append({})
+                
+                if self.sem_seg_postprocess_before_inference:
+                    mask_pred_result = retry_if_cuda_oom(sem_seg_postprocess)(
+                        mask_pred_result, image_size, height, width
+                    )
+                    mask_cls_result = mask_cls_result.to(mask_pred_result)
+                    
+                mask_pred_result = mask_pred_result.squeeze(1)
+                # semantic segmentation inference
+                if self.semantic_on:
+                    r = retry_if_cuda_oom(self.semantic_inference)(mask_cls_result, mask_pred_result)
+                    if not self.sem_seg_postprocess_before_inference:
+                        r = retry_if_cuda_oom(sem_seg_postprocess)(r, image_size, height, width)
+                    processed_results[-1]["sem_seg"] = r
+
+                # panoptic segmentation inference
+                if self.panoptic_on:
+                    panoptic_r = retry_if_cuda_oom(self.panoptic_inference)(mask_cls_result, mask_pred_result)
+                    processed_results[-1]["panoptic_seg"] = panoptic_r
+                
+                # instance segmentation inference
+                if self.instance_on:
+                    instance_r = retry_if_cuda_oom(self.instance_inference)(mask_cls_result, mask_pred_result)
+                    processed_results[-1]["instances"] = instance_r
+
+            return processed_results
+
+    def sem_seg_2_gt_masks(self, sem_seg, height, width):
+        classes = torch.unique(sem_seg,sorted=False,return_inverse=False,return_counts=False)
+        gt_labels = classes[classes != 255]
+        masks = [sem_seg == class_id for class_id in gt_labels]
+
+        if len(masks) == 0:
+            gt_masks = torch.zeros((0, sem_seg.shape[-2],
+                                            sem_seg.shape[-1])).to(sem_seg)
+        else:
+            gt_masks = torch.stack(masks).squeeze(1)
+            
+        num_masks = gt_masks.shape[0]
+        total_masks = torch.zeros((num_masks, gt_masks.shape[1], gt_masks.shape[2]), dtype=gt_masks.dtype, device=gt_masks.device)
+        labels = torch.zeros((num_masks), device=gt_masks.device)
+        
+        total_masks[:num_masks] = gt_masks[:num_masks]
+        labels[:num_masks] = gt_labels[:num_masks]
+        
+        return total_masks.float(), labels
+    
+    def visual_prediction_forward_convnext(self, x):
+        batch, channel, h, w = x.shape
+        
+        x = x.reshape(batch*h*w, channel).unsqueeze(-1).unsqueeze(-1) # fake 2D input
+        
+        x = self.backbone.clip_model.visual.trunk.head(x)
+        
+        x = self.backbone.clip_model.visual.head(x)
+        
+        return x.reshape(batch, h, w, x.shape[-1]).permute(0,3,1,2) 
+    
+    def visual_prediction_forward_convnext_2d(self, x):
+        
+        clip_vis_dense = self.backbone.clip_model.visual.trunk.head.norm(x)
+        clip_vis_dense = self.backbone.clip_model.visual.trunk.head.drop(clip_vis_dense.permute(0, 2, 3, 1))
+        clip_vis_dense = self.backbone.clip_model.visual.head(clip_vis_dense).permute(0, 3, 1, 2)
+        
+        return clip_vis_dense
+    
+    def cross_entropy_loss(self, mask_cls_results, labels):
+        
+        if torch.all(labels == -1):
+            loss_ce = mask_cls_results.sum() * 0.0 
+        else:
+            loss_ce = F.cross_entropy(mask_cls_results.transpose(1, 2), labels.to(torch.int64), ignore_index=-1)  #remove celoss weight because of multiple datasets training
+
+        losses = {"loss_ce": loss_ce}
+        return losses
+    
+    def prepare_targets(self, targets, images):
+        h_pad, w_pad = images.tensor.shape[-2:]
+        new_targets = []
+        masks_list = []
+        labels_list = []
+
+        num_masks = 32  
+        min_mask_area = 0
+        
+        for targets_per_image in targets:
+            gt_masks = targets_per_image.gt_masks
+            if isinstance(gt_masks, BitMasks):
+                gt_masks = gt_masks.tensor
+            valid_mask_indices = [i for i, mask in enumerate(gt_masks) if mask.sum() > min_mask_area]  
+
+            if len(valid_mask_indices) > 0:
+                valid_gt_masks = gt_masks[valid_mask_indices]
+                valid_gt_classes = targets_per_image.gt_classes[valid_mask_indices]
+                
+                padded_masks = torch.zeros((valid_gt_masks.shape[0], h_pad, w_pad), dtype=valid_gt_masks.dtype, device=valid_gt_masks.device)
+                padded_masks[:, : valid_gt_masks.shape[1], : valid_gt_masks.shape[2]] = valid_gt_masks
+                new_targets.append(
+                    {
+                        "labels": valid_gt_classes,
+                        "masks": padded_masks,
+                    }
+                )
+
+                total_masks = torch.zeros((num_masks, h_pad, w_pad), dtype=gt_masks.dtype, device=gt_masks.device)
+                selected_labels = torch.zeros((num_masks), device=gt_masks.device)
+
+                if valid_gt_masks.shape[0] > num_masks:
+                    selected_indices = torch.randperm(valid_gt_masks.shape[0])[:num_masks]
+                    for idx, mask_idx in enumerate(selected_indices):
+                        total_masks[idx, :valid_gt_masks[mask_idx].shape[0], :valid_gt_masks[mask_idx].shape[1]] = valid_gt_masks[mask_idx]
+                        selected_labels[idx] = valid_gt_classes[mask_idx]
+                else:
+                    for idx in range(valid_gt_masks.shape[0]):
+                        total_masks[idx, :valid_gt_masks[idx].shape[0], :valid_gt_masks[idx].shape[1]] = valid_gt_masks[idx]
+                        selected_labels[idx] = valid_gt_classes[idx]
+                    
+                    for idx in range(valid_gt_masks.shape[0], num_masks):
+                        total_masks[idx] = torch.zeros((h_pad, w_pad), dtype=gt_masks.dtype, device=gt_masks.device)
+                        selected_labels[idx] = -1
+            else:
+                total_masks = torch.zeros((num_masks, h_pad, w_pad), dtype=gt_masks.dtype, device=gt_masks.device)
+                selected_labels = torch.zeros((num_masks), device=gt_masks.device)
+                selected_labels.fill_(-1)  
+                
+                padded_masks = torch.zeros((0, h_pad, w_pad), dtype=gt_masks.dtype, device=gt_masks.device)
+                valid_gt_classes = torch.zeros((0), device=gt_masks.device)
+                new_targets.append(
+                    {
+                        "labels": valid_gt_classes,
+                        "masks": padded_masks,
+                    }
+                )
+
+            masks_list.append(total_masks)
+            labels_list.append(selected_labels)
+
+        masks = torch.stack(masks_list, dim=0)
+        labels = torch.stack(labels_list, dim=0)
+        labels = labels.long()
+
+        return new_targets, masks, labels
+
+    def semantic_inference(self, mask_cls, mask_pred):  
+
+        mask_cls = F.softmax(mask_cls, dim=-1)[..., :-1]
+        if mask_pred.dim() == 4:
+            mask_pred = mask_pred.squeeze(dim=0)
+        #mask_pred = mask_pred.sigmoid() #remove because of gt masks
+        semseg = torch.einsum("qc,qhw->chw", mask_cls, mask_pred)
+        return semseg
+
+    def panoptic_inference(self, mask_cls, mask_pred):
+
+                
+        scores, labels = F.softmax(mask_cls, dim=-1).max(-1)
+        num_classes = len(self.test_metadata[self.test_dataname].stuff_classes)
+        keep = labels.ne(num_classes) & (scores > self.object_mask_threshold)
+        cur_scores = scores[keep]
+        cur_classes = labels[keep]
+        cur_masks = mask_pred[keep]
+        cur_mask_cls = mask_cls[keep]
+        cur_mask_cls = cur_mask_cls[:, :-1]
+
+        cur_prob_masks = cur_scores.view(-1, 1, 1) * cur_masks
+
+        h, w = cur_masks.shape[-2:]
+        panoptic_seg = torch.zeros((h, w), dtype=torch.int32, device=cur_masks.device)
+        segments_info = []
+
+        current_segment_id = 0
+
+        if cur_masks.shape[0] == 0:
+            # We didn't detect any mask :(
+            return panoptic_seg, segments_info
+        else:
+            # take argmax
+            cur_mask_ids = cur_prob_masks.argmax(0)
+            stuff_memory_list = {}
+            for k in range(cur_classes.shape[0]):
+                pred_class = cur_classes[k].item()
+                isthing = pred_class in self.test_metadata[self.test_dataname].thing_dataset_id_to_contiguous_id.values()
+                mask_area = (cur_mask_ids == k).sum().item()
+                original_area = (cur_masks[k] >= 0.5).sum().item()
+                mask = (cur_mask_ids == k) & (cur_masks[k] >= 0.5)
+
+                if mask_area > 0 and original_area > 0 and mask.sum().item() > 0:
+                    if mask_area / original_area < self.overlap_threshold:
+                        continue
+
+                    # merge stuff regions
+                    if not isthing:
+                        if int(pred_class) in stuff_memory_list.keys():
+                            panoptic_seg[mask] = stuff_memory_list[int(pred_class)]
+                            continue
+                        else:
+                            stuff_memory_list[int(pred_class)] = current_segment_id + 1
+
+                    current_segment_id += 1
+                    panoptic_seg[mask] = current_segment_id
+
+                    segments_info.append(
+                        {
+                            "id": current_segment_id,
+                            "isthing": bool(isthing),
+                            "category_id": int(pred_class),
+                        }
+                    )
+
+            return panoptic_seg, segments_info
+
+    def instance_inference(self, mask_cls, mask_pred):
+        # mask_pred is already processed to have the same shape as original input
+
+        image_size = mask_pred.shape[-2:]
+
+        # [Q, K]
+        #scores = F.softmax(mask_cls, dim=-1)[:, :-1]  #[250,150]
+        scores = mask_cls[:, :-1].sigmoid()
+        # if this is panoptic segmentation
+        if self.panoptic_on:
+            num_classes = len(self.test_metadata[self.test_dataname].stuff_classes)
+        else:
+            num_classes = len(self.test_metadata[self.test_dataname].thing_classes)
+        labels = torch.arange(num_classes, device=self.device).unsqueeze(0).repeat(self.num_queries, 1).flatten(0, 1)
+        # scores_per_image, topk_indices = scores.flatten(0, 1).topk(self.num_queries, sorted=False)
+        scores_per_image, topk_indices = scores.flatten(0, 1).topk(self.test_topk_per_image, sorted=False)
+        labels_per_image = labels[topk_indices]
+
+        topk_indices = topk_indices // num_classes
+        # mask_pred = mask_pred.unsqueeze(1).repeat(1, self.sem_seg_head.num_classes, 1).flatten(0, 1)
+        mask_pred = mask_pred[topk_indices]
+
+        # if this is panoptic segmentation, we only keep the "thing" classes
+        if self.panoptic_on:
+            keep = torch.zeros_like(scores_per_image).bool()
+            for i, lab in enumerate(labels_per_image):
+                keep[i] = lab in self.test_metadata[self.test_dataname].thing_dataset_id_to_contiguous_id.values()
+
+            scores_per_image = scores_per_image[keep]
+            labels_per_image = labels_per_image[keep]
+            mask_pred = mask_pred[keep]
+
+        result = Instances(image_size)
+        # mask (before sigmoid)
+        result.pred_masks = (mask_pred > self.mask_threshold).float()
+        result.pred_boxes = Boxes(torch.zeros(mask_pred.size(0), 4))
+        # Uncomment the following to get boxes from masks (this is slow)
+        # result.pred_boxes = BitMasks(mask_pred > 0).get_bounding_boxes()
+
+        # calculate average mask prob
+        mask_scores_per_image = (mask_pred.flatten(1) * result.pred_masks.flatten(1)).sum(1) / (result.pred_masks.flatten(1).sum(1) + 1e-6)
+        result.scores = scores_per_image * mask_scores_per_image
+        result.pred_classes = labels_per_image
+        return result
+
+class MaskPooling(nn.Module):
+    def __init__(
+        self,mask_threshold
+    ):
+        super().__init__()
+        self.mask_threshold = mask_threshold
+
+    def forward(self, x, mask):
+        """
+        Args:
+            x: [B, C, H, W]
+            mask: [B, Q, H, W]
+        """
+        if not x.shape[-2:] == mask.shape[-2:]:
+            # reshape mask to x
+            mask = F.interpolate(mask, size=x.shape[-2:], mode='bilinear', align_corners=False)
+        with torch.no_grad():
+            mask = mask.detach()
+            binary_mask = (mask > self.mask_threshold).to(mask.dtype)
+            mask = binary_mask * mask
+            denorm = mask.sum(dim=(-1, -2), keepdim=True) + 1e-8
+
+        mask_pooled_x = torch.einsum(
+            "bchw,bqhw->bqc",
+            x,
+            mask / denorm,
+        )
+        return mask_pooled_x
+    
+def get_classification_logits(x, text_classifier, logit_scale, num_templates=None):
+    # x in shape of [B, *, C]
+    # text_classifier in shape of [num_classes, C]
+    # logit_scale is a learnable scalar https://github.com/mlfoundations/open_clip/blob/main/src/open_clip/model.py#L201
+    # return: [B, *, num_classes]
+    x = F.normalize(x, dim=-1)
+    logit_scale = torch.clamp(logit_scale.exp(), max=100)
+    if len(text_classifier.shape) == 2:
+        pred_logits = logit_scale * x @ text_classifier.T # B, *, N + 1
+    else:
+        pred_logits = logit_scale * x @ text_classifier.permute(0,2,1) # B, *, N + 1
+    # max ensembel as in OpenSeg/ODISE
+    if pred_logits.shape[2] != 1204 and pred_logits.shape[2] != 366:
+        final_pred_logits = []
+        cur_idx = 0
+        for num_t in num_templates: 
+            final_pred_logits.append(pred_logits[:, :, cur_idx: cur_idx + num_t].max(-1).values)
+            cur_idx += num_t
+        final_pred_logits.append(pred_logits[:, :, -1]) # the last classifier is for void
+        final_pred_logits = torch.stack(final_pred_logits, dim=-1)
+    else:
+        final_pred_logits = pred_logits
+    return final_pred_logits
\ No newline at end of file
diff --git a/mask_adapter/modeling/.DS_Store b/mask_adapter/modeling/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..b81552e861cc18d038dcaee13c31803c6c6b0504
Binary files /dev/null and b/mask_adapter/modeling/.DS_Store differ
diff --git a/mask_adapter/modeling/__init__.py b/mask_adapter/modeling/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..08f0fe6acef28ac21adad27c792bf3c74fec12d7
--- /dev/null
+++ b/mask_adapter/modeling/__init__.py
@@ -0,0 +1,17 @@
+"""
+Copyright (2023) Bytedance Ltd. and/or its affiliates
+
+Licensed under the Apache License, Version 2.0 (the "License"); 
+you may not use this file except in compliance with the License. 
+You may obtain a copy of the License at 
+
+    http://www.apache.org/licenses/LICENSE-2.0 
+
+Unless required by applicable law or agreed to in writing, software 
+distributed under the License is distributed on an "AS IS" BASIS, 
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+See the License for the specific language governing permissions and 
+limitations under the License. 
+"""
+from .backbone.clip import CLIP
+from .meta_arch.mask_adapter_head import MASKAdapterHead
diff --git a/mask_adapter/modeling/__pycache__/__init__.cpython-310.pyc b/mask_adapter/modeling/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3e7bda800cd30b89d65d0acff682b20a1ddce640
Binary files /dev/null and b/mask_adapter/modeling/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mask_adapter/modeling/__pycache__/__init__.cpython-38.pyc b/mask_adapter/modeling/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2a639188d1e53edf84f26ab6db0dfe14cdf0ff73
Binary files /dev/null and b/mask_adapter/modeling/__pycache__/__init__.cpython-38.pyc differ
diff --git a/mask_adapter/modeling/__pycache__/criterion.cpython-310.pyc b/mask_adapter/modeling/__pycache__/criterion.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..764e0422d792ce21d0ce83299f31c957095b3382
Binary files /dev/null and b/mask_adapter/modeling/__pycache__/criterion.cpython-310.pyc differ
diff --git a/mask_adapter/modeling/__pycache__/criterion.cpython-38.pyc b/mask_adapter/modeling/__pycache__/criterion.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8cc4e9b7fd15e4781c82744c35fd9d41169c8623
Binary files /dev/null and b/mask_adapter/modeling/__pycache__/criterion.cpython-38.pyc differ
diff --git a/mask_adapter/modeling/__pycache__/matcher.cpython-310.pyc b/mask_adapter/modeling/__pycache__/matcher.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4fe1f05ec17b0a960a22156c0d1e5defda7aaa2d
Binary files /dev/null and b/mask_adapter/modeling/__pycache__/matcher.cpython-310.pyc differ
diff --git a/mask_adapter/modeling/__pycache__/matcher.cpython-38.pyc b/mask_adapter/modeling/__pycache__/matcher.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d5371cd939e2e8d5938c956b4893a6c0695d22bb
Binary files /dev/null and b/mask_adapter/modeling/__pycache__/matcher.cpython-38.pyc differ
diff --git a/mask_adapter/modeling/backbone/__init__.py b/mask_adapter/modeling/backbone/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..be75f0cd9568f901b3174ecfb43c0b9f4fa1f77d
--- /dev/null
+++ b/mask_adapter/modeling/backbone/__init__.py
@@ -0,0 +1,15 @@
+"""
+Copyright (2023) Bytedance Ltd. and/or its affiliates
+
+Licensed under the Apache License, Version 2.0 (the "License"); 
+you may not use this file except in compliance with the License. 
+You may obtain a copy of the License at 
+
+    http://www.apache.org/licenses/LICENSE-2.0 
+
+Unless required by applicable law or agreed to in writing, software 
+distributed under the License is distributed on an "AS IS" BASIS, 
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+See the License for the specific language governing permissions and 
+limitations under the License. 
+"""
\ No newline at end of file
diff --git a/mask_adapter/modeling/backbone/__pycache__/__init__.cpython-310.pyc b/mask_adapter/modeling/backbone/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e71f344d63bf658f6e998a268ee6b8dff5ebf557
Binary files /dev/null and b/mask_adapter/modeling/backbone/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mask_adapter/modeling/backbone/__pycache__/__init__.cpython-38.pyc b/mask_adapter/modeling/backbone/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e1308e37150648316c3dcb567f8dda356c8c1bcd
Binary files /dev/null and b/mask_adapter/modeling/backbone/__pycache__/__init__.cpython-38.pyc differ
diff --git a/mask_adapter/modeling/backbone/__pycache__/clip.cpython-310.pyc b/mask_adapter/modeling/backbone/__pycache__/clip.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ab6a2ee965d0b11b9428762a6571db7bf433f63f
Binary files /dev/null and b/mask_adapter/modeling/backbone/__pycache__/clip.cpython-310.pyc differ
diff --git a/mask_adapter/modeling/backbone/__pycache__/clip.cpython-38.pyc b/mask_adapter/modeling/backbone/__pycache__/clip.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a6d9d76abb34dd3073afe391dbe5c2884f34dd40
Binary files /dev/null and b/mask_adapter/modeling/backbone/__pycache__/clip.cpython-38.pyc differ
diff --git a/mask_adapter/modeling/backbone/__pycache__/simple_tokenizer.cpython-310.pyc b/mask_adapter/modeling/backbone/__pycache__/simple_tokenizer.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..945327f04f86ad629779597e1d969be94c70bf5e
Binary files /dev/null and b/mask_adapter/modeling/backbone/__pycache__/simple_tokenizer.cpython-310.pyc differ
diff --git a/mask_adapter/modeling/backbone/__pycache__/simple_tokenizer.cpython-38.pyc b/mask_adapter/modeling/backbone/__pycache__/simple_tokenizer.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..44754fd189a3c20c61b86aec0d82f68e0f28e7c4
Binary files /dev/null and b/mask_adapter/modeling/backbone/__pycache__/simple_tokenizer.cpython-38.pyc differ
diff --git a/mask_adapter/modeling/backbone/clip.py b/mask_adapter/modeling/backbone/clip.py
new file mode 100644
index 0000000000000000000000000000000000000000..6937174e6ffd5dc4772ce79ea64ef164d460c3d7
--- /dev/null
+++ b/mask_adapter/modeling/backbone/clip.py
@@ -0,0 +1,233 @@
+"""
+Copyright (2023) Bytedance Ltd. and/or its affiliates
+
+Licensed under the Apache License, Version 2.0 (the "License"); 
+you may not use this file except in compliance with the License. 
+You may obtain a copy of the License at 
+
+    http://www.apache.org/licenses/LICENSE-2.0 
+
+Unless required by applicable law or agreed to in writing, software 
+distributed under the License is distributed on an "AS IS" BASIS, 
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+See the License for the specific language governing permissions and 
+limitations under the License. 
+"""
+
+import torch
+import torch.nn.functional as F
+import math
+from detectron2.utils import comm
+
+import open_clip
+
+from detectron2.modeling import BACKBONE_REGISTRY, Backbone, ShapeSpec
+
+@BACKBONE_REGISTRY.register()
+class CLIP(Backbone):
+    def __init__(self, cfg, input_shape):
+        super().__init__()
+        model_name = cfg.MODEL.FC_CLIP.CLIP_MODEL_NAME
+        pretrained= cfg.MODEL.FC_CLIP.CLIP_PRETRAINED_WEIGHTS
+        # download on local rank 0 first
+        if comm.get_local_rank() == 0:
+            open_clip.create_model_and_transforms(model_name, pretrained=pretrained)
+        comm.synchronize()
+
+        self.model_name = model_name
+        self.pretrained = pretrained
+
+        self.clip_model, _, _ = open_clip.create_model_and_transforms(model_name, pretrained=pretrained)
+        self.text_tokenizer = open_clip.get_tokenizer(model_name)
+
+        model_name = model_name.lower()
+        if 'convnext_' in model_name:
+            self.model_type = 'convnext'
+            if '_base' in model_name:
+                self.output_channels = [128, 128, 256, 512, 1024]
+            elif '_large' in model_name:
+                self.output_channels = [192, 192, 384, 768, 1536]
+            elif '_xxlarge' in model_name:
+                self.output_channels = [384, 384, 768, 1536, 3072]
+        
+        elif 'rn' in model_name:
+            self.model_type = 'resnet'
+            if model_name.replace('-quickgelu', '') in ['rn50', 'rn101']:
+                self.output_channels = [64, 256, 512, 1024, 2048]
+            elif model_name == 'rn50x4':
+                self.output_channels = [80, 320, 640, 1280, 2560]
+            elif model_name == 'rn50x16':
+                self.output_channels = [96, 384, 768, 1536, 3072]
+            elif model_name == 'rn50x64':
+                self.output_channels = [128, 512, 1024, 2048, 4096]
+
+        self._out_feature_strides = {
+            "stem": 2,
+            "res2": 4,
+            "res3": 8,
+            "res4": 16,
+            "res5": 32,
+            "clip_embedding": -1
+        }
+        self._out_feature_channels = {
+            "stem": self.output_channels[0],
+            "res2": self.output_channels[1],
+            "res3": self.output_channels[2],
+            "res4": self.output_channels[3],
+            "res5": self.output_channels[4],
+            "clip_embedding": self.dim_latent
+        }
+
+        self.eval()
+        self.freeze_everything()
+
+    def freeze_everything(self):
+        for param in self.clip_model.parameters():
+            param.requires_grad = False
+
+    def encode_text(self, text, normalize: bool = False):
+        cast_dtype = self.clip_model.transformer.get_cast_dtype()
+
+        x = self.clip_model.token_embedding(text).to(cast_dtype)  # [batch_size, n_ctx, d_model]
+
+        x = x + self.clip_model.positional_embedding.to(cast_dtype)
+        x = x.permute(1, 0, 2)  # NLD -> LND
+        x = self.clip_model.transformer(x, attn_mask=self.clip_model.attn_mask)
+        x = x.permute(1, 0, 2)  # LND -> NLD
+        x = self.clip_model.ln_final(x)  # [batch_size, n_ctx, transformer.width]
+        # take features from the eot embedding (eot_token is the highest number in each sequence)
+        x = x[torch.arange(x.shape[0]), text.argmax(dim=-1)] @ self.clip_model.text_projection
+        return F.normalize(x, dim=-1) if normalize else x
+
+    def tokenize_text(self, text):
+        return self.text_tokenizer(text)
+
+    def extract_features(self, x):
+        return {
+            'convnext': self.extract_features_convnext,
+            'resnet': self.extract_features_resnet,
+        }[self.model_type](x)
+    
+    def visual_prediction_forward(self, x, masks=None):
+        return {
+            'convnext': self.visual_prediction_forward_convnext,
+            'resnet': self.visual_prediction_forward_resnet,
+        }[self.model_type](x, masks)
+
+    def extract_features_convnext(self, x):
+        out = {}
+        x = self.clip_model.visual.trunk.stem(x)
+        out['stem'] = x.contiguous() # os4
+        for i in range(4):
+            x = self.clip_model.visual.trunk.stages[i](x)
+            out[f'res{i+2}'] = x.contiguous() # res 2 (os4), 3 (os8), 4 (os16), 5 (os32)
+        
+        x = self.clip_model.visual.trunk.norm_pre(x)
+        out['clip_vis_dense'] = x.contiguous()
+        return out
+    
+    def extract_features_resnet(self, x):
+        out = {}
+        x = self.clip_model.visual.act1(self.clip_model.visual.bn1(self.clip_model.visual.conv1(x)))
+        x = self.clip_model.visual.act2(self.clip_model.visual.bn2(self.clip_model.visual.conv2(x)))
+        x = self.clip_model.visual.act3(self.clip_model.visual.bn3(self.clip_model.visual.conv3(x)))
+        out['stem'] = x.contiguous() # os2
+        x = self.clip_model.visual.avgpool(x)
+        x = self.clip_model.visual.layer1(x)
+        out['res2'] = x.contiguous() # os4
+        x = self.clip_model.visual.layer2(x)
+        out['res3'] = x.contiguous() # os8
+        x = self.clip_model.visual.layer3(x)
+        out['res4'] = x.contiguous() # os16
+        x = self.clip_model.visual.layer4(x)
+        out['res5'] = x.contiguous() # os32
+        out['clip_vis_dense'] = x
+        return out
+
+    def visual_prediction_forward_convnext(self, x, masks):
+        batch, num_query, channel = x.shape
+        x = x.reshape(batch*num_query, channel, 1, 1) # fake 2D input
+        x = self.clip_model.visual.trunk.head(x)
+        x = self.clip_model.visual.head(x)
+        return x.view(batch, num_query, x.shape[-1]) # B x num_queries x 640
+
+    def visual_prediction_forward_resnet(self, x, masks):
+        batch, channel, height, width = x.shape
+        if masks.shape[-2] != height or masks.shape[-1] != width:
+            masks = F.inteprolate(masks, size=(height, width), mode='bilinear', align_corners=False)
+        num_masks = masks.shape[1]
+
+        positional_embedding = self.clip_model.visual.attnpool.positional_embedding.to(x.dtype)
+        spatial_pos_embed = positional_embedding[1:, None, :] # HW x 1 x C
+        orig_size = int(math.sqrt(spatial_pos_embed.shape[0]))
+        spatial_pos_embed = spatial_pos_embed.permute(1, 2, 0).reshape(1, channel, orig_size, orig_size)
+        spatial_pos_embed = F.interpolate(spatial_pos_embed, size=(height, width), mode='bilinear', align_corners=False) # 1 x C x H x W
+        spatial_pos_embed = spatial_pos_embed.permute(2, 3, 0, 1).reshape(height*width, 1, channel)
+        x = x.reshape(batch, channel, height * width).permute(2, 0, 1)  # BCHW -> (HW)BC
+        key_value = x + spatial_pos_embed
+        
+        masks = masks.reshape(batch, num_masks, height * width)
+        masks = (masks > 0).to(masks.dtype)
+        query = x.mean(0, keepdim=True) + positional_embedding[:1, None, :]
+        query = query.repeat_interleave(num_masks, dim=0)
+
+        attn_mask = masks < 0.5
+        attn_mask = attn_mask.unsqueeze(1).expand(-1, self.clip_model.visual.attnpool.num_heads, -1, -1)
+        attn_mask = attn_mask.reshape(batch * self.clip_model.visual.attnpool.num_heads,
+                                    query.shape[0], key_value.shape[0])
+
+        x = F.multi_head_attention_forward(
+            query=query, key=key_value, value=key_value,
+            embed_dim_to_check=key_value.shape[-1],
+            num_heads=self.clip_model.visual.attnpool.num_heads,
+            q_proj_weight=self.clip_model.visual.attnpool.q_proj.weight,
+            k_proj_weight=self.clip_model.visual.attnpool.k_proj.weight,
+            v_proj_weight=self.clip_model.visual.attnpool.v_proj.weight,
+            in_proj_weight=None,
+            in_proj_bias=torch.cat([self.clip_model.visual.attnpool.q_proj.bias,
+                                    self.clip_model.visual.attnpool.k_proj.bias,
+                                    self.clip_model.visual.attnpool.v_proj.bias]),
+            bias_k=None,
+            bias_v=None,
+            add_zero_attn=False,
+            dropout_p=0.,
+            out_proj_weight=self.clip_model.visual.attnpool.c_proj.weight,
+            out_proj_bias=self.clip_model.visual.attnpool.c_proj.bias,
+            use_separate_proj_weight=True,
+            training=self.clip_model.visual.attnpool.training,
+            need_weights=False,
+            attn_mask=attn_mask
+        )[0].permute(1, 0, 2) # B x N x C
+
+        return x
+
+    def get_text_classifier(self, text_list, device):
+        self.eval()
+        with torch.no_grad():
+            # reference for templates: https://github.com/mlfoundations/open_clip/blob/91f6cce16b7bee90b3b5d38ca305b5b3b67cc200/src/training/imagenet_zeroshot_data.py
+            text_tokens = self.tokenize_text(text_list)
+            text_tokens = text_tokens.to(device)
+            # we return un-normalized text feature.
+            text_features = self.encode_text(text_tokens, normalize=False)
+            return text_features
+
+    def forward(self, x):
+        self.eval()
+        with torch.no_grad():
+            return self.extract_features(x)
+    
+    @property
+    def dim_latent(self):
+        return self.clip_model.text_projection.shape[-1]
+    
+    def output_shape(self):
+        return {
+            name: ShapeSpec(
+                channels=self._out_feature_channels[name], stride=self._out_feature_strides[name]
+            )
+            for name in ["stem", "res2", "res3", "res4", "res5", "clip_embedding"]
+        }
+
+    @property
+    def size_divisibility(self):
+        return -1
\ No newline at end of file
diff --git a/mask_adapter/modeling/maft/__pycache__/content_dependent_transfer.cpython-310.pyc b/mask_adapter/modeling/maft/__pycache__/content_dependent_transfer.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..309e0ba9495b9b44e4c4bc460d608dbbc4a71e25
Binary files /dev/null and b/mask_adapter/modeling/maft/__pycache__/content_dependent_transfer.cpython-310.pyc differ
diff --git a/mask_adapter/modeling/maft/__pycache__/content_dependent_transfer.cpython-38.pyc b/mask_adapter/modeling/maft/__pycache__/content_dependent_transfer.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ae8987e34f075453fbdd626aefa35885a9060b42
Binary files /dev/null and b/mask_adapter/modeling/maft/__pycache__/content_dependent_transfer.cpython-38.pyc differ
diff --git a/mask_adapter/modeling/maft/content_dependent_transfer.py b/mask_adapter/modeling/maft/content_dependent_transfer.py
new file mode 100644
index 0000000000000000000000000000000000000000..e1d6d78b531349b3481bc891de9a94c84f0f0505
--- /dev/null
+++ b/mask_adapter/modeling/maft/content_dependent_transfer.py
@@ -0,0 +1,134 @@
+import math
+import torch
+from torch import nn, Tensor
+from torch.nn import functional as F
+from typing import Optional
+
+
+
+
+
+
+
+class ShortCut_CrossAttention(nn.Module):
+
+    def __init__(self, d_model, nhead, panoptic_on = False):
+        super().__init__()
+        self.multihead_attn = nn.MultiheadAttention(d_model, nhead, dropout=0.0)
+        self.norm = nn.LayerNorm(d_model)
+        self.activation = F.relu
+
+        self._reset_parameters()
+
+        self.MLP = nn.Linear(d_model, d_model)
+        self.panoptic_on = panoptic_on
+        if panoptic_on:
+            nn.init.constant(self.MLP.weight, 0.0)
+            nn.init.constant(self.MLP.bias, 0.0)
+
+
+    def _reset_parameters(self):
+        for p in self.parameters():
+            if p.dim() > 1:
+                nn.init.xavier_uniform_(p)
+
+    def with_pos_embed(self, tensor, pos: Optional[Tensor]):
+        return tensor if pos is None else tensor + pos
+
+    def forward(self, tgt, memory,
+                memory_mask: Optional[Tensor] = None,
+                memory_key_padding_mask: Optional[Tensor] = None,
+                pos: Optional[Tensor] = None,
+                query_pos: Optional[Tensor] = None):
+        tgt2 = self.multihead_attn(query=self.with_pos_embed(tgt, query_pos),
+                                   key=self.with_pos_embed(memory, pos),
+                                   value=memory, attn_mask=memory_mask,
+                                   key_padding_mask=memory_key_padding_mask)[0]
+        
+        if self.panoptic_on:
+            tgt = tgt + self.norm(self.MLP(tgt2))
+        else:
+            tgt = self.norm(tgt + self.MLP(tgt2))
+
+        return tgt
+    
+
+
+class ContentDependentTransfer(nn.Module):
+
+    def __init__(self, d_model, nhead, panoptic_on):
+        super().__init__()
+        self.pe_layer = PositionEmbeddingSine(d_model//2, normalize=True)
+        self.cross_atten = ShortCut_CrossAttention(d_model = d_model, nhead = nhead, panoptic_on = panoptic_on)
+
+    def visual_prediction_forward_convnext(self, x):
+        batch, channel, h, w = x.shape
+        x = x.reshape(batch*h*w, channel).unsqueeze(-1).unsqueeze(-1) # fake 2D input
+        x = self.truck_head(x)
+        x = self.head(x)
+        return x.reshape(batch, h, w, x.shape[-1]).permute(0,3,1,2) # B x num_queries x 640
+    
+
+    def forward(self, img_feat, text_classifier, ):
+        text_classifier = text_classifier.unsqueeze(0).repeat(img_feat.shape[0],1,1)
+
+        pos = self.pe_layer(img_feat, None).flatten(2).permute(2, 0, 1)  # hw * b * c
+        img_feat = img_feat.flatten(2).permute(2, 0, 1)  # hw * b * c
+
+        bias = self.cross_atten(text_classifier.permute(1, 0, 2), img_feat, memory_mask=None, memory_key_padding_mask=None, pos=pos, query_pos=None)
+
+        return bias.permute(1, 0, 2) 
+
+class PositionEmbeddingSine(nn.Module):
+    """
+    This is a more standard version of the position embedding, very similar to the one
+    used by the Attention is all you need paper, generalized to work on images.
+    """
+
+    def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None):
+        super().__init__()
+        self.num_pos_feats = num_pos_feats
+        self.temperature = temperature
+        self.normalize = normalize
+        if scale is not None and normalize is False:
+            raise ValueError("normalize should be True if scale is passed")
+        if scale is None:
+            scale = 2 * math.pi
+        self.scale = scale
+
+    def forward(self, x, mask=None):
+        if mask is None:
+            mask = torch.zeros((x.size(0), x.size(2), x.size(3)), device=x.device, dtype=torch.bool)
+        not_mask = ~mask
+        y_embed = not_mask.cumsum(1, dtype=torch.float32)
+        x_embed = not_mask.cumsum(2, dtype=torch.float32)
+        if self.normalize:
+            eps = 1e-6
+            y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
+            x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
+
+        dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
+        dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
+
+        pos_x = x_embed[:, :, :, None] / dim_t
+        pos_y = y_embed[:, :, :, None] / dim_t
+        pos_x = torch.stack(
+            (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4
+        ).flatten(3)
+        pos_y = torch.stack(
+            (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4
+        ).flatten(3)
+        pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
+        return pos
+    
+    def __repr__(self, _repr_indent=4):
+        head = "Positional encoding " + self.__class__.__name__
+        body = [
+            "num_pos_feats: {}".format(self.num_pos_feats),
+            "temperature: {}".format(self.temperature),
+            "normalize: {}".format(self.normalize),
+            "scale: {}".format(self.scale),
+        ]
+        # _repr_indent = 4
+        lines = [head] + [" " * _repr_indent + line for line in body]
+        return "\n".join(lines)
diff --git a/mask_adapter/modeling/meta_arch/__init__.py b/mask_adapter/modeling/meta_arch/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..be75f0cd9568f901b3174ecfb43c0b9f4fa1f77d
--- /dev/null
+++ b/mask_adapter/modeling/meta_arch/__init__.py
@@ -0,0 +1,15 @@
+"""
+Copyright (2023) Bytedance Ltd. and/or its affiliates
+
+Licensed under the Apache License, Version 2.0 (the "License"); 
+you may not use this file except in compliance with the License. 
+You may obtain a copy of the License at 
+
+    http://www.apache.org/licenses/LICENSE-2.0 
+
+Unless required by applicable law or agreed to in writing, software 
+distributed under the License is distributed on an "AS IS" BASIS, 
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+See the License for the specific language governing permissions and 
+limitations under the License. 
+"""
\ No newline at end of file
diff --git a/mask_adapter/modeling/meta_arch/__pycache__/__init__.cpython-310.pyc b/mask_adapter/modeling/meta_arch/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b1c1e2958458670202769a05bedbe09eeab51d5f
Binary files /dev/null and b/mask_adapter/modeling/meta_arch/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mask_adapter/modeling/meta_arch/__pycache__/__init__.cpython-38.pyc b/mask_adapter/modeling/meta_arch/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8ea0ba67306d42c3429d096c0e9e5c55c1b4cc89
Binary files /dev/null and b/mask_adapter/modeling/meta_arch/__pycache__/__init__.cpython-38.pyc differ
diff --git a/mask_adapter/modeling/meta_arch/__pycache__/convnext.cpython-310.pyc b/mask_adapter/modeling/meta_arch/__pycache__/convnext.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0bfee6e15c8090944ece489a9d0485a7d7e3b9a8
Binary files /dev/null and b/mask_adapter/modeling/meta_arch/__pycache__/convnext.cpython-310.pyc differ
diff --git a/mask_adapter/modeling/meta_arch/__pycache__/convnext.cpython-38.pyc b/mask_adapter/modeling/meta_arch/__pycache__/convnext.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d9f4c488b1b17b2b0c4a37c7428e1d05c4c36e12
Binary files /dev/null and b/mask_adapter/modeling/meta_arch/__pycache__/convnext.cpython-38.pyc differ
diff --git a/mask_adapter/modeling/meta_arch/__pycache__/fcclip_head.cpython-310.pyc b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_head.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..21f45f98520dfffea76e747a652584dfeb95d8a2
Binary files /dev/null and b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_head.cpython-310.pyc differ
diff --git a/mask_adapter/modeling/meta_arch/__pycache__/fcclip_head.cpython-38.pyc b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_head.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..24b1b01b67b67a7b0b156637e44a83993f698609
Binary files /dev/null and b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_head.cpython-38.pyc differ
diff --git a/mask_adapter/modeling/meta_arch/__pycache__/fcclip_mask_adapter.cpython-310.pyc b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_mask_adapter.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2e28266b30d8b7b2e9e3ee5ae2fd334df8a83dfa
Binary files /dev/null and b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_mask_adapter.cpython-310.pyc differ
diff --git a/mask_adapter/modeling/meta_arch/__pycache__/fcclip_mask_adapter.cpython-38.pyc b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_mask_adapter.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..916e0ec19ebe2fd87f43711bd5071f837b371b27
Binary files /dev/null and b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_mask_adapter.cpython-38.pyc differ
diff --git a/mask_adapter/modeling/meta_arch/__pycache__/fcclip_mask_adapter_first.cpython-310.pyc b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_mask_adapter_first.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d52e3c03f74cecb6a6f2b2afbd3bd71fc36be102
Binary files /dev/null and b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_mask_adapter_first.cpython-310.pyc differ
diff --git a/mask_adapter/modeling/meta_arch/__pycache__/fcclip_mask_adapter_first.cpython-38.pyc b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_mask_adapter_first.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a9a5f435547cb341ed6148d0547866eb7764b725
Binary files /dev/null and b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_mask_adapter_first.cpython-38.pyc differ
diff --git a/mask_adapter/modeling/meta_arch/__pycache__/fcclip_sparse_head.cpython-38.pyc b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_sparse_head.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2867e48e495ffbc86612caded57a6938f7a1f749
Binary files /dev/null and b/mask_adapter/modeling/meta_arch/__pycache__/fcclip_sparse_head.cpython-38.pyc differ
diff --git a/mask_adapter/modeling/meta_arch/__pycache__/mask_adapter_head.cpython-310.pyc b/mask_adapter/modeling/meta_arch/__pycache__/mask_adapter_head.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..491016fe5354345828ebd2b29fa7e6f9663476c6
Binary files /dev/null and b/mask_adapter/modeling/meta_arch/__pycache__/mask_adapter_head.cpython-310.pyc differ
diff --git a/mask_adapter/modeling/meta_arch/__pycache__/mask_adapter_head.cpython-38.pyc b/mask_adapter/modeling/meta_arch/__pycache__/mask_adapter_head.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..df135cbecc87364de21d32a6bdc9fe81a78c31b9
Binary files /dev/null and b/mask_adapter/modeling/meta_arch/__pycache__/mask_adapter_head.cpython-38.pyc differ
diff --git a/mask_adapter/modeling/meta_arch/convnext.py b/mask_adapter/modeling/meta_arch/convnext.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7e6153ca9640dd8f455a54093fe07430db1cb01
--- /dev/null
+++ b/mask_adapter/modeling/meta_arch/convnext.py
@@ -0,0 +1,116 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from timm.models.layers import trunc_normal_, DropPath
+
+
+class ConvNextV2Block(nn.Module):
+    """ ConvNeXtV2 Block.
+
+    Args:
+        dim (int): Number of input channels.
+        drop_path (float): Stochastic depth rate. Default: 0.0
+    """
+
+    def __init__(self, dim, drop_path=0.):
+        super().__init__()
+        self.dwconv = nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim)  # depthwise conv
+        self.norm = LayerNorm(dim, eps=1e-6)
+        self.pwconv1 = nn.Linear(dim, 4 * dim)  # pointwise/1x1 convs, implemented with linear layers
+        self.act = nn.GELU()
+        self.grn = GRN(4 * dim)
+        self.pwconv2 = nn.Linear(4 * dim, dim)
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+
+    def forward(self, x):
+        input = x
+        x = self.dwconv(x)
+        x = x.permute(0, 2, 3, 1)  # (N, C, H, W) -> (N, H, W, C)
+        x = self.norm(x)
+        x = self.pwconv1(x)
+        x = self.act(x)
+        x = self.grn(x)
+        x = self.pwconv2(x)
+        x = x.permute(0, 3, 1, 2)  # (N, H, W, C) -> (N, C, H, W)
+
+        x = input + self.drop_path(x)
+        return x
+
+class GRN(nn.Module):
+    """ GRN (Global Response Normalization) layer
+    """
+    def __init__(self, dim):
+        super().__init__()
+        self.gamma = nn.Parameter(torch.zeros(1, 1, 1, dim))
+        self.beta = nn.Parameter(torch.zeros(1, 1, 1, dim))
+
+    def forward(self, x):
+        Gx = torch.norm(x, p=2, dim=(1,2), keepdim=True)
+        Nx = Gx / (Gx.mean(dim=-1, keepdim=True) + 1e-6)
+        return self.gamma * (x * Nx) + self.beta + x
+
+class ConvNextBlock(nn.Module):
+    r""" ConvNeXt Block. There are two equivalent implementations:
+    (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
+    (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
+    We use (2) as we find it slightly faster in PyTorch
+
+    Args:
+        dim (int): Number of input channels.
+        drop_path (float): Stochastic depth rate. Default: 0.0
+        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
+    """
+
+    def __init__(self, dim, kernel_size=7, drop_path=0., layer_scale_init_value=1e-6):
+        super().__init__()
+        self.dwconv = nn.Conv2d(dim, dim, kernel_size=kernel_size, padding=kernel_size//2, groups=dim)  # depthwise conv
+        self.norm = LayerNorm(dim, eps=1e-6)
+        self.pwconv1 = nn.Linear(dim, 4 * dim)  # pointwise/1x1 convs, implemented with linear layers
+        self.act = nn.GELU()
+        self.pwconv2 = nn.Linear(4 * dim, dim)
+        self.gamma = nn.Parameter(layer_scale_init_value * torch.ones((dim)),
+                                  requires_grad=True) if layer_scale_init_value > 0 else None
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+
+    def forward(self, x):
+        input = x
+        x = self.dwconv(x)
+        x = x.permute(0, 2, 3, 1)  # (N, C, H, W) -> (N, H, W, C)
+        x = self.norm(x)
+        x = self.pwconv1(x)
+        x = self.act(x)
+        x = self.pwconv2(x)
+        if self.gamma is not None:
+            x = self.gamma * x
+        x = x.permute(0, 3, 1, 2)  # (N, H, W, C) -> (N, C, H, W)
+
+        x = input + self.drop_path(x)
+        return x
+
+
+class LayerNorm(nn.Module):
+    r""" LayerNorm that supports two data formats: channels_last (default) or channels_first.
+    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
+    shape (batch_size, height, width, channels) while channels_first corresponds to inputs
+    with shape (batch_size, channels, height, width).
+    """
+
+    def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(normalized_shape))
+        self.bias = nn.Parameter(torch.zeros(normalized_shape))
+        self.eps = eps
+        self.data_format = data_format
+        if self.data_format not in ["channels_last", "channels_first"]:
+            raise NotImplementedError
+        self.normalized_shape = (normalized_shape,)
+
+    def forward(self, x):
+        if self.data_format == "channels_last":
+            return F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
+        elif self.data_format == "channels_first":
+            u = x.mean(1, keepdim=True)
+            s = (x - u).pow(2).mean(1, keepdim=True)
+            x = (x - u) / torch.sqrt(s + self.eps)
+            x = self.weight[:, None, None] * x + self.bias[:, None, None]
+            return x
\ No newline at end of file
diff --git a/mask_adapter/modeling/meta_arch/mask_adapter_head.py b/mask_adapter/modeling/meta_arch/mask_adapter_head.py
new file mode 100644
index 0000000000000000000000000000000000000000..716db2b5b8ae2bc4be02b384cc2ab5d574c6d0a2
--- /dev/null
+++ b/mask_adapter/modeling/meta_arch/mask_adapter_head.py
@@ -0,0 +1,137 @@
+import logging
+from copy import deepcopy
+from typing import Callable, Dict, List, Optional, Tuple, Union
+
+import fvcore.nn.weight_init as weight_init
+from torch import nn
+from torch.nn import functional as F
+import torch
+from detectron2.config import configurable
+from detectron2.layers import Conv2d, ShapeSpec, get_norm
+from detectron2.modeling import SEM_SEG_HEADS_REGISTRY
+import torch.utils.checkpoint as cp
+from .convnext import ConvNextBlock
+from einops import rearrange,repeat
+
+@SEM_SEG_HEADS_REGISTRY.register()
+class MASKAdapterHead(nn.Module):
+
+    @configurable
+    def __init__(
+        self,
+        clip_model_name,
+        mask_in_chans: int,
+        num_channels: int,
+        use_checkpoint: bool,
+        num_output_maps: int,
+    ):
+        """
+        NOTE: this interface is experimental.
+        Args:
+            input_shape: shapes (channels and stride) of the input features
+            num_classes: number of classes to predict
+            pixel_decoder: the pixel decoder module
+            loss_weight: loss weight
+            ignore_value: category id to be ignored during training.
+            transformer_predictor: the transformer decoder that makes prediction
+            transformer_in_feature: input feature name to the transformer_predictor
+        """
+        super().__init__()
+        self.use_checkpoint = use_checkpoint
+        
+        if '_base' in clip_model_name:
+            clip_dim = 640
+        elif '_large' in clip_model_name:
+            clip_dim = 768
+        
+        self.fuse = nn.Conv2d(clip_dim, num_channels, 1)
+                
+        self.cnext1 = ConvNextBlock(num_channels)
+        
+        self.cnext2 = ConvNextBlock(num_channels)
+        
+        self.cnext3 = ConvNextBlock(num_channels)
+        
+        self.norm = nn.LayerNorm(num_channels)
+        self.final = nn.Conv2d(num_channels, num_output_maps, 1)
+        
+        self.mask_downscaling = nn.Sequential(
+            nn.Conv2d(1, mask_in_chans // 4, kernel_size=3, stride=2, padding=1),
+            LayerNorm2d(mask_in_chans // 4),
+            nn.GELU(),
+            nn.Conv2d(mask_in_chans // 4, mask_in_chans, kernel_size=3, stride=2, padding=1),
+            LayerNorm2d(mask_in_chans),
+            nn.GELU(),
+            nn.Conv2d(mask_in_chans, clip_dim, kernel_size=1),
+        )
+        
+
+    @classmethod
+    def from_config(cls, cfg):
+
+        return {
+            "clip_model_name": cfg.MODEL.FC_CLIP.CLIP_MODEL_NAME,
+            "mask_in_chans": cfg.MODEL.MASK_ADAPTER.MASK_IN_CHANNELS,
+            "num_channels": cfg.MODEL.MASK_ADAPTER.NUM_CHANNELS,
+            "use_checkpoint": cfg.MODEL.MASK_ADAPTER.USE_CHECKPOINT,
+            "num_output_maps": cfg.MODEL.MASK_ADAPTER.NUM_OUTPUT_MAPS,
+        }
+
+    def forward(self, clip_feature, masks):
+
+        
+        N = masks.size(1)
+        masks = rearrange(masks, 'B N H W -> (B N) H W').unsqueeze(dim=1)
+        
+        clip_feature = repeat(clip_feature, "B C H W -> (B N) C H W", N=N)
+        
+        H,W = clip_feature.shape[-2:]
+        masks = F.interpolate(masks.float(), size=(H*4,W*4),
+                                                mode='bilinear', align_corners=False)
+        masks = self.mask_downscaling(masks)
+        
+        outputs = clip_feature + masks
+        
+        def _inner_forward(outputs):
+            outputs = self.fuse(outputs)
+        
+            outputs = self.cnext1(outputs)
+            
+            outputs = self.cnext2(outputs)
+            
+            outputs = self.cnext3(outputs)
+            
+            outputs = outputs.permute(0, 2, 3, 1) 
+            outputs = self.norm(outputs.contiguous())
+            outputs = outputs.permute(0, 3, 1, 2) 
+            
+            outputs = self.final(outputs.contiguous()) 
+            
+            outputs = rearrange(outputs, '(B N) C H W -> B (N C) H W',N=N)
+    
+            return outputs
+
+        if self.use_checkpoint and self.training:
+            outputs = cp.checkpoint(_inner_forward, outputs,use_reentrant=False)
+        else:
+            outputs = _inner_forward(outputs)
+        return outputs
+
+def build_mask_adapter(cfg,name):
+    return SEM_SEG_HEADS_REGISTRY.get(name)(cfg)
+
+# From https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py # noqa
+# Itself from https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119  # noqa
+class LayerNorm2d(nn.Module):
+    def __init__(self, num_channels: int, eps: float = 1e-6) -> None:
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(num_channels))
+        self.bias = nn.Parameter(torch.zeros(num_channels))
+        self.eps = eps
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        u = x.mean(1, keepdim=True)
+        s = (x - u).pow(2).mean(1, keepdim=True)
+        x = (x - u) / torch.sqrt(s + self.eps)
+        x = self.weight[:, None, None] * x + self.bias[:, None, None]
+        return x
\ No newline at end of file
diff --git a/mask_adapter/sam_maskadapter.py b/mask_adapter/sam_maskadapter.py
new file mode 100644
index 0000000000000000000000000000000000000000..c5c03686a692402bf22823b6e5c23a2c550abef8
--- /dev/null
+++ b/mask_adapter/sam_maskadapter.py
@@ -0,0 +1,362 @@
+import numpy as np
+import torch
+from torch.nn import functional as F
+import cv2
+
+from detectron2.data import MetadataCatalog
+from detectron2.structures import BitMasks
+from detectron2.utils.visualizer import ColorMode, Visualizer
+
+import open_clip
+from sam2.build_sam import build_sam2
+from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
+from .modeling.meta_arch.mask_adapter_head import build_mask_adapter
+from sam2.sam2_image_predictor import SAM2ImagePredictor
+
+
+from PIL import Image
+
+PIXEL_MEAN = [122.7709383, 116.7460125, 104.09373615]
+PIXEL_STD = [68.5005327, 66.6321579, 70.32316305]
+
+class OpenVocabVisualizer(Visualizer):
+    def __init__(self, img_rgb, metadata=None, scale=1.0, instance_mode=ColorMode.IMAGE, class_names=None):
+        super().__init__(img_rgb, metadata, scale, instance_mode)
+        self.class_names = class_names
+
+    def draw_sem_seg(self, sem_seg, area_threshold=None, alpha=0.6):
+        """
+        Draw semantic segmentation predictions/labels.
+        Args:
+            sem_seg (Tensor or ndarray): the segmentation of shape (H, W).
+                Each value is the integer label of the pixel.
+            area_threshold (int): segments with less than `area_threshold` are not drawn.
+            alpha (float): the larger it is, the more opaque the segmentations are.
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        if isinstance(sem_seg, torch.Tensor):
+            sem_seg = sem_seg.numpy()
+        labels, areas = np.unique(sem_seg, return_counts=True)
+        sorted_idxs = np.argsort(-areas).tolist()
+        labels = labels[sorted_idxs]
+        class_names = self.class_names if self.class_names is not None else self.metadata.stuff_classes
+
+        for label in filter(lambda l: l < len(class_names), labels):
+            try:
+                mask_color = [x / 255 for x in self.metadata.stuff_colors[label]]
+            except (AttributeError, IndexError):
+                mask_color = None
+
+            binary_mask = (sem_seg == label).astype(np.uint8)
+            text = class_names[label]
+            self.draw_binary_mask(
+                binary_mask,
+                color=mask_color,
+                edge_color=(1.0, 1.0, 240.0 / 255),
+                text=text,
+                alpha=alpha,
+                area_threshold=area_threshold,
+            )
+        return self.output
+
+
+class SAMVisualizationDemo(object):
+    def __init__(self, cfg, granularity, sam2, clip_model ,mask_adapter, instance_mode=ColorMode.IMAGE, parallel=False,):
+        self.metadata = MetadataCatalog.get(
+            cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused"
+        )
+
+        self.cpu_device = torch.device("cpu")
+        self.instance_mode = instance_mode
+
+        self.parallel = parallel
+        self.granularity = granularity
+        
+        self.sam2 = sam2
+        self.predictor = SAM2AutomaticMaskGenerator(sam2, points_per_batch=16,
+                                                pred_iou_thresh=0.8,
+                                                stability_score_thresh=0.7,
+                                                crop_n_layers=0,
+                                                crop_n_points_downscale_factor=2,
+                                                min_mask_region_area=100)
+
+        self.clip_model = clip_model
+        self.mask_adapter = mask_adapter
+        
+
+        
+    def extract_features_convnext(self, x):
+        out = {}
+        x = self.clip_model.visual.trunk.stem(x)
+        out['stem'] = x.contiguous() # os4
+        for i in range(4):
+            x = self.clip_model.visual.trunk.stages[i](x)
+            out[f'res{i+2}'] = x.contiguous() # res 2 (os4), 3 (os8), 4 (os16), 5 (os32)
+        
+        x = self.clip_model.visual.trunk.norm_pre(x)
+        out['clip_vis_dense'] = x.contiguous()
+        return out
+    
+    def visual_prediction_forward_convnext(self, x):
+        batch, num_query, channel = x.shape
+        x = x.reshape(batch*num_query, channel, 1, 1) # fake 2D input
+        x = self.clip_model.visual.trunk.head(x)
+        x = self.clip_model.visual.head(x)
+        return x.view(batch, num_query, x.shape[-1]) # B x num_queries x 640
+    
+    def visual_prediction_forward_convnext_2d(self, x):
+        
+        clip_vis_dense = self.clip_model.visual.trunk.head.norm(x)
+        clip_vis_dense = self.clip_model.visual.trunk.head.drop(clip_vis_dense.permute(0, 2, 3, 1))
+        clip_vis_dense = self.clip_model.visual.head(clip_vis_dense).permute(0, 3, 1, 2)
+        
+        return clip_vis_dense
+    
+    def run_on_image(self, ori_image, class_names):
+        height, width, _ = ori_image.shape
+        if width > height:
+            new_width = 896
+            new_height = int((new_width / width) * height)
+        else:
+            new_height = 896
+            new_width = int((new_height / height) * width)
+        image = cv2.resize(ori_image, (new_width, new_height))
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        ori_image = cv2.cvtColor(ori_image, cv2.COLOR_BGR2RGB)
+        visualizer = OpenVocabVisualizer(ori_image, self.metadata, instance_mode=self.instance_mode, class_names=class_names)
+        with torch.no_grad():#, torch.cuda.amp.autocast():
+            masks = self.predictor.generate(image)
+        pred_masks = [masks[i]['segmentation'][None,:,:] for i in range(len(masks))]
+        pred_masks = np.row_stack(pred_masks)
+        pred_masks = BitMasks(pred_masks)
+
+        image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
+
+        pixel_mean = torch.tensor(PIXEL_MEAN).view(-1, 1, 1)
+        pixel_std = torch.tensor(PIXEL_STD).view(-1, 1, 1)
+        
+        image = (image - pixel_mean) / pixel_std
+
+        image = image.unsqueeze(0)
+        
+        if len(class_names) == 1:
+            class_names.append('others')
+        txts = [f'a photo of {cls_name}' for cls_name in class_names]
+        text = open_clip.tokenize(txts)
+
+
+        with torch.no_grad():
+            self.clip_model.cuda()
+            text_features = self.clip_model.encode_text(text.cuda())
+            text_features /= text_features.norm(dim=-1, keepdim=True)
+            
+            features = self.extract_features_convnext(image.cuda().float())
+            
+            clip_feature = features['clip_vis_dense']
+            
+            clip_vis_dense = self.visual_prediction_forward_convnext_2d(clip_feature)
+            
+            semantic_activation_maps = self.mask_adapter(clip_vis_dense, pred_masks.tensor.unsqueeze(0).float().cuda())
+            
+            maps_for_pooling = F.interpolate(semantic_activation_maps, size=clip_feature.shape[-2:],
+                                                mode='bilinear', align_corners=False)
+            
+            B, C = clip_feature.size(0),clip_feature.size(1)
+            N = maps_for_pooling.size(1)
+            num_instances = N // 16
+            maps_for_pooling = F.softmax(F.logsigmoid(maps_for_pooling).view(B, N,-1), dim=-1)
+            pooled_clip_feature = torch.bmm(maps_for_pooling, clip_feature.view(B, C, -1).permute(0, 2, 1))
+            pooled_clip_feature = self.visual_prediction_forward_convnext(pooled_clip_feature)
+            pooled_clip_feature = (pooled_clip_feature.reshape(B,num_instances, 16, -1).mean(dim=-2).contiguous())
+                
+            class_preds = (100.0 * pooled_clip_feature @ text_features.T).softmax(dim=-1)
+        class_preds = class_preds.squeeze(0)
+        select_cls = torch.zeros_like(class_preds)
+
+        max_scores, select_mask = torch.max(class_preds, dim=0)
+        if len(class_names) == 2 and class_names[-1] == 'others':
+            select_mask = select_mask[:-1]
+        if self.granularity < 1:
+            thr_scores = max_scores * self.granularity
+            select_mask = []
+            if len(class_names) == 2 and class_names[-1] == 'others':
+                thr_scores = thr_scores[:-1]
+            for i, thr in enumerate(thr_scores):
+                cls_pred = class_preds[:,i]
+                locs = torch.where(cls_pred > thr)
+                select_mask.extend(locs[0].tolist())
+        for idx in select_mask:
+            select_cls[idx] = class_preds[idx]
+        semseg = torch.einsum("qc,qhw->chw", select_cls.float(), pred_masks.tensor.float().cuda())
+
+        r = semseg
+        blank_area = (r[0] == 0)
+        pred_mask = r.argmax(dim=0).to('cpu')
+        pred_mask[blank_area] = 255
+        pred_mask = np.array(pred_mask, dtype=int)
+        pred_mask = cv2.resize(pred_mask, (width, height), interpolation=cv2.INTER_NEAREST)
+
+        vis_output = visualizer.draw_sem_seg(
+            pred_mask
+        )
+
+        return None, vis_output
+    
+
+    
+class SAMPointVisualizationDemo(object):
+    def __init__(self, cfg, granularity, sam2, clip_model ,mask_adapter, instance_mode=ColorMode.IMAGE, parallel=False):
+        self.metadata = MetadataCatalog.get(
+            cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused"
+        )
+
+        self.cpu_device = torch.device("cpu")
+        self.instance_mode = instance_mode
+
+        self.parallel = parallel
+        self.granularity = granularity
+        
+
+        self.sam2 = sam2
+
+        self.predictor = SAM2ImagePredictor(sam2)
+
+        self.clip_model = clip_model
+
+        self.mask_adapter = mask_adapter
+
+        
+        from .data.datasets import openseg_classes
+
+        COCO_CATEGORIES_pan = openseg_classes.get_coco_categories_with_prompt_eng()
+        #COCO_CATEGORIES_seg = openseg_classes.get_coco_stuff_categories_with_prompt_eng()
+
+        thing_classes = [k["name"] for k in COCO_CATEGORIES_pan if k["isthing"] == 1]
+        stuff_classes = [k["name"] for k in COCO_CATEGORIES_pan]
+        #print(coco_metadata)
+        lvis_classes = open("./mask_adapter/data/datasets/lvis_1203_with_prompt_eng.txt", 'r').read().splitlines()
+        lvis_classes = [x[x.find(':')+1:] for x in lvis_classes]
+                
+        self.class_names = thing_classes + stuff_classes + lvis_classes
+        self.text_embedding = torch.from_numpy(np.load("./text_embedding/lvis_coco_text_embedding.npy")).to("cuda")
+    
+        self.class_names = self._load_class_names() 
+
+    def _load_class_names(self):
+        from .data.datasets import openseg_classes
+        COCO_CATEGORIES_pan = openseg_classes.get_coco_categories_with_prompt_eng()
+        thing_classes = [k["name"] for k in COCO_CATEGORIES_pan if k["isthing"] == 1]
+        stuff_classes = [k["name"] for k in COCO_CATEGORIES_pan]
+        lvis_classes = open("./mask_adapter/data/datasets/lvis_1203_with_prompt_eng.txt", 'r').read().splitlines()
+        lvis_classes = [x[x.find(':')+1:] for x in lvis_classes]
+        return thing_classes + stuff_classes + lvis_classes
+
+
+    def extract_features_convnext(self, x):
+        out = {}
+        x = self.clip_model.visual.trunk.stem(x)
+        out['stem'] = x.contiguous() # os4
+        for i in range(4):
+            x = self.clip_model.visual.trunk.stages[i](x)
+            out[f'res{i+2}'] = x.contiguous() # res 2 (os4), 3 (os8), 4 (os16), 5 (os32)
+        
+        x = self.clip_model.visual.trunk.norm_pre(x)
+        out['clip_vis_dense'] = x.contiguous()
+        return out
+    
+    def visual_prediction_forward_convnext(self, x):
+        batch, num_query, channel = x.shape
+        x = x.reshape(batch*num_query, channel, 1, 1) # fake 2D input
+        x = self.clip_model.visual.trunk.head(x)
+        x = self.clip_model.visual.head(x)
+        return x.view(batch, num_query, x.shape[-1]) # B x num_queries x 640
+    
+    def visual_prediction_forward_convnext_2d(self, x):
+        
+        clip_vis_dense = self.clip_model.visual.trunk.head.norm(x)
+        clip_vis_dense = self.clip_model.visual.trunk.head.drop(clip_vis_dense.permute(0, 2, 3, 1))
+        clip_vis_dense = self.clip_model.visual.head(clip_vis_dense).permute(0, 3, 1, 2)
+        
+        return clip_vis_dense
+    
+    def run_on_image_with_points(self, ori_image, points):
+        height, width, _ = ori_image.shape
+
+        image = ori_image
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        ori_image = cv2.cvtColor(ori_image, cv2.COLOR_BGR2RGB)
+
+        input_point = np.array(points)
+        input_label = np.array([1])
+
+        with torch.no_grad():
+            self.predictor.set_image(image)
+            masks, _, _ = self.predictor.predict(point_coords=input_point, point_labels=input_label, multimask_output=False)
+
+        pred_masks = BitMasks(masks)
+
+        image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
+
+        pixel_mean = torch.tensor(PIXEL_MEAN).view(-1, 1, 1)
+        pixel_std = torch.tensor(PIXEL_STD).view(-1, 1, 1)
+
+        image = (image - pixel_mean) / pixel_std
+        image = image.unsqueeze(0)
+
+        # txts = [f'a photo of {cls_name}' for cls_name in self.class_names]
+        # text = open_clip.tokenize(txts)
+
+        with torch.no_grad():
+            self.clip_model.cuda()
+            # text_features = self.clip_model.encode_text(text.cuda())
+            # text_features /= text_features.norm(dim=-1, keepdim=True)
+            #np.save("/home/yongkangli/Mask-Adapter/text_embedding/lvis_coco_text_embedding.npy", text_features.cpu().numpy())
+            text_features = self.text_embedding
+            features = self.extract_features_convnext(image.cuda().float())
+            clip_feature = features['clip_vis_dense']
+
+            clip_vis_dense = self.visual_prediction_forward_convnext_2d(clip_feature)
+
+            semantic_activation_maps = self.mask_adapter(clip_vis_dense, pred_masks.tensor.unsqueeze(0).float().cuda())
+            maps_for_pooling = F.interpolate(semantic_activation_maps, size=clip_feature.shape[-2:], mode='bilinear', align_corners=False)
+
+            B, C = clip_feature.size(0), clip_feature.size(1)
+            N = maps_for_pooling.size(1)
+            num_instances = N // 16
+            maps_for_pooling = F.softmax(F.logsigmoid(maps_for_pooling).view(B, N,-1), dim=-1)
+            pooled_clip_feature = torch.bmm(maps_for_pooling, clip_feature.view(B, C, -1).permute(0, 2, 1))
+            pooled_clip_feature = self.visual_prediction_forward_convnext(pooled_clip_feature)
+            pooled_clip_feature = (pooled_clip_feature.reshape(B, num_instances, 16, -1).mean(dim=-2).contiguous())
+
+            class_preds = (100.0 * pooled_clip_feature @ text_features.T).softmax(dim=-1)
+        class_preds = class_preds.squeeze(0)
+
+        # Resize mask to match original image size
+        pred_mask = cv2.resize(masks.squeeze(0), (width, height), interpolation=cv2.INTER_NEAREST)  # Resize mask to match original image size
+
+        # Create an overlay for the mask with a transparent background (using alpha transparency)
+        overlay = ori_image.copy()
+        mask_colored = np.zeros_like(ori_image)
+        mask_colored[pred_mask == 1] = [234, 103, 112]  # Green color for the mask
+
+        # Apply the mask with transparency (alpha blending)
+        alpha = 0.5
+        cv2.addWeighted(mask_colored, alpha, overlay, 1 - alpha, 0, overlay)
+
+        # Draw boundary (contours) on the overlay
+        contours, _ = cv2.findContours(pred_mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+        cv2.drawContours(overlay, contours, -1, (255, 255, 255), 2)  # White boundary
+
+        # Add label based on the class with the highest score
+        max_scores, max_score_idx = class_preds.max(dim=1)  # Find the max score across the class predictions
+        label = f"{self.class_names[max_score_idx.item()]}: {max_scores.item():.2f}"
+
+        # Dynamically place the label near the clicked point
+        text_x = min(width - 200, points[0][0] + 20)  # Add some offset from the point
+        text_y = min(height - 30, points[0][1] + 20)  # Ensure the text does not go out of bounds
+
+        # Put text near the point
+        cv2.putText(overlay, label, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
+
+        return None, Image.fromarray(overlay)
\ No newline at end of file
diff --git a/mask_adapter/test_time_augmentation.py b/mask_adapter/test_time_augmentation.py
new file mode 100644
index 0000000000000000000000000000000000000000..76794c4589f196680363cc26dfacfbe0dd7c689d
--- /dev/null
+++ b/mask_adapter/test_time_augmentation.py
@@ -0,0 +1,108 @@
+"""
+This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”).
+All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. 
+
+Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/test_time_augmentation.py
+"""
+import copy
+import logging
+from itertools import count
+
+import numpy as np
+import torch
+from fvcore.transforms import HFlipTransform
+from torch import nn
+from torch.nn.parallel import DistributedDataParallel
+
+from detectron2.data.detection_utils import read_image
+from detectron2.modeling import DatasetMapperTTA
+
+
+__all__ = [
+    "SemanticSegmentorWithTTA",
+]
+
+
+class SemanticSegmentorWithTTA(nn.Module):
+    """
+    A SemanticSegmentor with test-time augmentation enabled.
+    Its :meth:`__call__` method has the same interface as :meth:`SemanticSegmentor.forward`.
+    """
+
+    def __init__(self, cfg, model, tta_mapper=None, batch_size=1):
+        """
+        Args:
+            cfg (CfgNode):
+            model (SemanticSegmentor): a SemanticSegmentor to apply TTA on.
+            tta_mapper (callable): takes a dataset dict and returns a list of
+                augmented versions of the dataset dict. Defaults to
+                `DatasetMapperTTA(cfg)`.
+            batch_size (int): batch the augmented images into this batch size for inference.
+        """
+        super().__init__()
+        if isinstance(model, DistributedDataParallel):
+            model = model.module
+        self.cfg = cfg.clone()
+
+        self.model = model
+
+        if tta_mapper is None:
+            tta_mapper = DatasetMapperTTA(cfg)
+        self.tta_mapper = tta_mapper
+        self.batch_size = batch_size
+
+    def __call__(self, batched_inputs):
+        """
+        Same input/output format as :meth:`SemanticSegmentor.forward`
+        """
+
+        def _maybe_read_image(dataset_dict):
+            ret = copy.copy(dataset_dict)
+            if "image" not in ret:
+                image = read_image(ret.pop("file_name"), self.model.input_format)
+                image = torch.from_numpy(np.ascontiguousarray(image.transpose(2, 0, 1)))  # CHW
+                ret["image"] = image
+            if "height" not in ret and "width" not in ret:
+                ret["height"] = image.shape[1]
+                ret["width"] = image.shape[2]
+            return ret
+
+        processed_results = []
+        for x in batched_inputs:
+            result = self._inference_one_image(_maybe_read_image(x))
+            processed_results.append(result)
+        return processed_results
+
+    def _inference_one_image(self, input):
+        """
+        Args:
+            input (dict): one dataset dict with "image" field being a CHW tensor
+        Returns:
+            dict: one output dict
+        """
+        orig_shape = (input["height"], input["width"])
+        augmented_inputs, tfms = self._get_augmented_inputs(input)
+
+        final_predictions = None
+        count_predictions = 0
+        for input, tfm in zip(augmented_inputs, tfms):
+            count_predictions += 1
+            with torch.no_grad():
+                if final_predictions is None:
+                    if any(isinstance(t, HFlipTransform) for t in tfm.transforms):
+                        final_predictions = self.model([input])[0].pop("sem_seg").flip(dims=[2])
+                    else:
+                        final_predictions = self.model([input])[0].pop("sem_seg")
+                else:
+                    if any(isinstance(t, HFlipTransform) for t in tfm.transforms):
+                        final_predictions += self.model([input])[0].pop("sem_seg").flip(dims=[2])
+                    else:
+                        final_predictions += self.model([input])[0].pop("sem_seg")
+
+        final_predictions = final_predictions / count_predictions
+        return {"sem_seg": final_predictions}
+
+    def _get_augmented_inputs(self, input):
+        augmented_inputs = self.tta_mapper(input)
+        tfms = [x.pop("transforms") for x in augmented_inputs]
+        return augmented_inputs, tfms
diff --git a/mask_adapter/utils/__init__.py b/mask_adapter/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..be75f0cd9568f901b3174ecfb43c0b9f4fa1f77d
--- /dev/null
+++ b/mask_adapter/utils/__init__.py
@@ -0,0 +1,15 @@
+"""
+Copyright (2023) Bytedance Ltd. and/or its affiliates
+
+Licensed under the Apache License, Version 2.0 (the "License"); 
+you may not use this file except in compliance with the License. 
+You may obtain a copy of the License at 
+
+    http://www.apache.org/licenses/LICENSE-2.0 
+
+Unless required by applicable law or agreed to in writing, software 
+distributed under the License is distributed on an "AS IS" BASIS, 
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
+See the License for the specific language governing permissions and 
+limitations under the License. 
+"""
\ No newline at end of file
diff --git a/mask_adapter/utils/__pycache__/__init__.cpython-310.pyc b/mask_adapter/utils/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..46eba9a9eb95435f6fab13b818d35016218ced10
Binary files /dev/null and b/mask_adapter/utils/__pycache__/__init__.cpython-310.pyc differ
diff --git a/mask_adapter/utils/__pycache__/__init__.cpython-38.pyc b/mask_adapter/utils/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..81ba5c9d803b2aa4f99879c53e5ef4b335f1ceca
Binary files /dev/null and b/mask_adapter/utils/__pycache__/__init__.cpython-38.pyc differ
diff --git a/mask_adapter/utils/__pycache__/misc.cpython-310.pyc b/mask_adapter/utils/__pycache__/misc.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d43fd81cc0559678a6bfebba5a6d251815cf63c5
Binary files /dev/null and b/mask_adapter/utils/__pycache__/misc.cpython-310.pyc differ
diff --git a/mask_adapter/utils/__pycache__/misc.cpython-38.pyc b/mask_adapter/utils/__pycache__/misc.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d170ba8f366e04c12b44aa73e4f75258436b7d25
Binary files /dev/null and b/mask_adapter/utils/__pycache__/misc.cpython-38.pyc differ
diff --git a/mask_adapter/utils/misc.py b/mask_adapter/utils/misc.py
new file mode 100644
index 0000000000000000000000000000000000000000..1420c9d568b2475dfd0a4bef92010be4015845b5
--- /dev/null
+++ b/mask_adapter/utils/misc.py
@@ -0,0 +1,114 @@
+"""
+This file may have been modified by Bytedance Ltd. and/or its affiliates (“Bytedance's Modifications”).
+All Bytedance's Modifications are Copyright (year) Bytedance Ltd. and/or its affiliates. 
+
+Reference: https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/utils/misc.py
+
+Misc functions, including distributed helpers.
+
+Mostly copy-paste from torchvision references.
+"""
+from typing import List, Optional
+
+import torch
+import torch.distributed as dist
+import torchvision
+from torch import Tensor
+
+
+def _max_by_axis(the_list):
+    # type: (List[List[int]]) -> List[int]
+    maxes = the_list[0]
+    for sublist in the_list[1:]:
+        for index, item in enumerate(sublist):
+            maxes[index] = max(maxes[index], item)
+    return maxes
+
+
+class NestedTensor(object):
+    def __init__(self, tensors, mask: Optional[Tensor]):
+        self.tensors = tensors
+        self.mask = mask
+
+    def to(self, device):
+        # type: (Device) -> NestedTensor # noqa
+        cast_tensor = self.tensors.to(device)
+        mask = self.mask
+        if mask is not None:
+            assert mask is not None
+            cast_mask = mask.to(device)
+        else:
+            cast_mask = None
+        return NestedTensor(cast_tensor, cast_mask)
+
+    def decompose(self):
+        return self.tensors, self.mask
+
+    def __repr__(self):
+        return str(self.tensors)
+
+
+def nested_tensor_from_tensor_list(tensor_list: List[Tensor]):
+    # TODO make this more general
+    if tensor_list[0].ndim == 3:
+        if torchvision._is_tracing():
+            # nested_tensor_from_tensor_list() does not export well to ONNX
+            # call _onnx_nested_tensor_from_tensor_list() instead
+            return _onnx_nested_tensor_from_tensor_list(tensor_list)
+
+        # TODO make it support different-sized images
+        max_size = _max_by_axis([list(img.shape) for img in tensor_list])
+        # min_size = tuple(min(s) for s in zip(*[img.shape for img in tensor_list]))
+        batch_shape = [len(tensor_list)] + max_size
+        b, c, h, w = batch_shape
+        dtype = tensor_list[0].dtype
+        device = tensor_list[0].device
+        tensor = torch.zeros(batch_shape, dtype=dtype, device=device)
+        mask = torch.ones((b, h, w), dtype=torch.bool, device=device)
+        for img, pad_img, m in zip(tensor_list, tensor, mask):
+            pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
+            m[: img.shape[1], : img.shape[2]] = False
+    else:
+        raise ValueError("not supported")
+    return NestedTensor(tensor, mask)
+
+
+# _onnx_nested_tensor_from_tensor_list() is an implementation of
+# nested_tensor_from_tensor_list() that is supported by ONNX tracing.
+@torch.jit.unused
+def _onnx_nested_tensor_from_tensor_list(tensor_list: List[Tensor]) -> NestedTensor:
+    max_size = []
+    for i in range(tensor_list[0].dim()):
+        max_size_i = torch.max(
+            torch.stack([img.shape[i] for img in tensor_list]).to(torch.float32)
+        ).to(torch.int64)
+        max_size.append(max_size_i)
+    max_size = tuple(max_size)
+
+    # work around for
+    # pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
+    # m[: img.shape[1], :img.shape[2]] = False
+    # which is not yet supported in onnx
+    padded_imgs = []
+    padded_masks = []
+    for img in tensor_list:
+        padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]
+        padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0]))
+        padded_imgs.append(padded_img)
+
+        m = torch.zeros_like(img[0], dtype=torch.int, device=img.device)
+        padded_mask = torch.nn.functional.pad(m, (0, padding[2], 0, padding[1]), "constant", 1)
+        padded_masks.append(padded_mask.to(torch.bool))
+
+    tensor = torch.stack(padded_imgs)
+    mask = torch.stack(padded_masks)
+
+    return NestedTensor(tensor, mask=mask)
+
+
+def is_dist_avail_and_initialized():
+    if not dist.is_available():
+        return False
+    if not dist.is_initialized():
+        return False
+    return True
diff --git a/text_embedding/lvis_coco_text_embedding.npy b/text_embedding/lvis_coco_text_embedding.npy
new file mode 100644
index 0000000000000000000000000000000000000000..cc87f080cad35cf8360299bcabd722e3b6fd903f
--- /dev/null
+++ b/text_embedding/lvis_coco_text_embedding.npy
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:934d29f264f21160ef124aa9b08be88d25845bd8a1aba14ce691512704a4c671
+size 4350080