diff --git a/configs/dinov2/dinov2_upernet_water.py b/configs/dinov2/dinov2_upernet_water.py
new file mode 100644
index 0000000000000000000000000000000000000000..5567426726af46f1bb3ee9ccadd4863c476396a7
--- /dev/null
+++ b/configs/dinov2/dinov2_upernet_water.py
@@ -0,0 +1,13 @@
+_base_ = [
+    "../_base_/models/dinov2_upernet.py",
+    "../_base_/datasets/water.py",
+    "../_base_/default_runtime.py",
+    "../_base_/schedules/water_schedule.py",
+]
+
+data_preprocessor = dict(size=(512, 512))
+model = dict(
+    data_preprocessor=data_preprocessor,
+    decode_head=dict(num_classes=6),
+    auxiliary_head=dict(num_classes=6)
+)
diff --git a/configs/ktda/dinov2_b_frozen-fam-fmm.py b/configs/ktda/dinov2_b_frozen-fam-fmm.py
new file mode 100644
index 0000000000000000000000000000000000000000..dba0f12e601607be2fa9cc215db8a426eb45651e
--- /dev/null
+++ b/configs/ktda/dinov2_b_frozen-fam-fmm.py
@@ -0,0 +1,18 @@
+_base_ = [
+    "../_base_/models/ktda.py",
+    "../_base_/datasets/grass.py",
+    "../_base_/default_runtime.py",
+    "../_base_/schedules/grass_schedule.py",
+]
+
+data_preprocessor = dict(size=(256, 256))
+model = dict(
+    data_preprocessor=data_preprocessor,
+    decode_head=dict(
+        num_classes=5,
+    ),
+    auxiliary_head=dict(
+        num_classes=5,
+    ),
+    fmm=dict(type="FMM", in_channels=[768, 768, 768, 768]),
+)
diff --git a/configs/ktda/dinov2_b_frozen-fam.py b/configs/ktda/dinov2_b_frozen-fam.py
new file mode 100644
index 0000000000000000000000000000000000000000..a70f88f8d72e92be48979dd57845e63aca10b63b
--- /dev/null
+++ b/configs/ktda/dinov2_b_frozen-fam.py
@@ -0,0 +1,13 @@
+_base_ = [
+    "../_base_/models/ktda.py",
+    "../_base_/datasets/grass.py",
+    "../_base_/default_runtime.py",
+    "../_base_/schedules/grass_schedule.py",
+]
+
+data_preprocessor = dict(size=(256, 256))
+model = dict(
+    data_preprocessor=data_preprocessor,
+    decode_head=dict(num_classes=5),
+    auxiliary_head=dict(num_classes=5)
+)
diff --git a/configs/ktda/experiment_a.py b/configs/ktda/experiment_a.py
new file mode 100644
index 0000000000000000000000000000000000000000..4fce7f1e1879324ece01dd4f5a8c760986abedf8
--- /dev/null
+++ b/configs/ktda/experiment_a.py
@@ -0,0 +1,14 @@
+_base_ = [
+    "../_base_/models/ktda.py",
+    "../_base_/datasets/grass.py",
+    "../_base_/default_runtime.py",
+    "../_base_/schedules/grass_schedule.py",
+]
+
+data_preprocessor = dict(size=(256, 256))
+model = dict(
+    student_training=False,
+    data_preprocessor=data_preprocessor,
+    decode_head=dict(num_classes=5),
+    auxiliary_head=dict(num_classes=5)
+)
diff --git a/configs/ktda/experiment_aa.py b/configs/ktda/experiment_aa.py
new file mode 100644
index 0000000000000000000000000000000000000000..9261127961bd712c6f5da049c81c55f6ae1e0d64
--- /dev/null
+++ b/configs/ktda/experiment_aa.py
@@ -0,0 +1,46 @@
+_base_ = [
+    "../_base_/models/convnextv2_femto_vit_segformer_vegseg.py",
+    "../_base_/datasets/grass.py",
+    "../_base_/default_runtime.py",
+    "../_base_/schedules/grass_schedule.py",
+]
+
+data_preprocessor = dict(size=(256, 256))
+model = dict(
+    teach_backbone=dict(
+        type="mmpretrain.VisionTransformer",
+        arch="large",
+        frozen_stages=24,
+        img_size=256,
+        patch_size=14,
+        layer_scale_init_value=1e-5,
+        out_indices=(7, 11, 15, 23),
+        out_type="featmap",
+        init_cfg=dict(
+            type="Pretrained",
+            checkpoint="checkpoints/dinov2-large.pth",
+            prefix="backbone",
+        ),
+    ),
+    fam=dict(out_channels=1024),
+    decode_head=dict(in_channels=[1024, 1024, 1024, 1024], num_classes=5),
+    data_preprocessor=data_preprocessor,
+    auxiliary_head=[
+        dict(
+            type="FCNHead",
+            in_channels=1024,
+            in_index=i,
+            channels=256,
+            num_convs=1,
+            concat_input=False,
+            dropout_ratio=0.1,
+            num_classes=5,
+            norm_cfg=dict(type="SyncBN", requires_grad=True),
+            align_corners=False,
+            loss_decode=dict(
+                type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4
+            ),
+        )
+        for i in range(4)
+    ],
+)
diff --git a/configs/ktda/experiment_k.py b/configs/ktda/experiment_k.py
new file mode 100644
index 0000000000000000000000000000000000000000..07902e0cad4f8686042e93da2ac1016bbaaf4925
--- /dev/null
+++ b/configs/ktda/experiment_k.py
@@ -0,0 +1,14 @@
+_base_ = [
+    "../_base_/models/ktda.py",
+    "../_base_/datasets/grass.py",
+    "../_base_/default_runtime.py",
+    "../_base_/schedules/grass_schedule.py",
+]
+
+data_preprocessor = dict(size=(256, 256))
+model = dict(
+    data_preprocessor=data_preprocessor,
+    decode_head=dict(num_classes=5),
+    auxiliary_head=dict(num_classes=5),
+    fmm=dict(type="FMM", in_channels=[768, 768, 768, 768],mlp_nums=4),
+)
diff --git a/configs/ktda/experiment_u.py b/configs/ktda/experiment_u.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed619c7953c844c868153bef1a7e2a9f44181819
--- /dev/null
+++ b/configs/ktda/experiment_u.py
@@ -0,0 +1,15 @@
+_base_ = [
+    "../_base_/models/ktda.py",
+    "../_base_/datasets/grass.py",
+    "../_base_/default_runtime.py",
+    "../_base_/schedules/grass_schedule.py",
+]
+
+data_preprocessor = dict(size=(256, 256))
+model = dict(
+    data_preprocessor=data_preprocessor,
+    decode_head=dict(num_classes=5),
+    auxiliary_head=dict(num_classes=5),
+    neck=None,
+    fmm=dict(type="FMM", in_channels=[768, 768, 768, 768]),
+)
diff --git a/configs/ktda/experiment_v.py b/configs/ktda/experiment_v.py
new file mode 100644
index 0000000000000000000000000000000000000000..ccd3bf394cb43393537558f251254130649c5aad
--- /dev/null
+++ b/configs/ktda/experiment_v.py
@@ -0,0 +1,26 @@
+_base_ = [
+    "../_base_/models/ktda.py",
+    "../_base_/datasets/grass.py",
+    "../_base_/default_runtime.py",
+    "../_base_/schedules/grass_schedule.py",
+]
+
+data_preprocessor = dict(size=(256, 256))
+model = dict(
+    data_preprocessor=data_preprocessor,
+    decode_head=dict(
+        _delete_=True,
+        type="SegformerHead",
+        in_channels=[768, 768, 768, 768],
+        in_index=[0, 1, 2, 3],
+        channels=256,
+        dropout_ratio=0.1,
+        num_classes=5,
+        norm_cfg=dict(type="SyncBN", requires_grad=True),
+        align_corners=False,
+        loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0),
+    ),
+    auxiliary_head=dict(num_classes=5),
+    neck=None,
+    fmm=dict(type="FMM", in_channels=[768, 768, 768, 768]),
+)
diff --git a/configs/ktda/ktda_grass.py b/configs/ktda/ktda_grass.py
new file mode 100644
index 0000000000000000000000000000000000000000..edb64e1bd48f1ee25d8e57dc0673eedd49435aac
--- /dev/null
+++ b/configs/ktda/ktda_grass.py
@@ -0,0 +1,19 @@
+_base_ = [
+    "../_base_/models/ktda.py",
+    "../_base_/datasets/grass.py",
+    "../_base_/default_runtime.py",
+    "../_base_/schedules/grass_schedule.py",
+]
+
+data_preprocessor = dict(size=(256, 256))
+model = dict(
+    data_preprocessor=data_preprocessor,
+    decode_head=dict(num_classes=5),
+    auxiliary_head=dict(num_classes=5),
+    fmm=dict(
+        type="FMM",
+        in_channels=[768, 768, 768, 768],
+        model_type="vitBlock",
+        mlp_nums=4,
+    ),
+)
diff --git a/configs/pspnet/pspnet_r101_water.py b/configs/pspnet/pspnet_r101_water.py
new file mode 100644
index 0000000000000000000000000000000000000000..18b71863989a32c639f6d28a5b24fb0eec325fce
--- /dev/null
+++ b/configs/pspnet/pspnet_r101_water.py
@@ -0,0 +1,15 @@
+_base_ = [
+    "../_base_/models/pspnet_r50-d8.py",
+    "../_base_/datasets/water.py",
+    "../_base_/default_runtime.py",
+    "../_base_/schedules/water_schedule.py",
+]
+
+data_preprocessor = dict(size=(512, 512))
+model = dict(
+    data_preprocessor=data_preprocessor,
+    pretrained='open-mmlab://resnet101_v1c', 
+    backbone=dict(depth=101),
+    decode_head=dict(num_classes=6),
+    auxiliary_head=dict(num_classes=6)
+)
\ No newline at end of file
diff --git a/configs/pspnet/pspnet_r50.py b/configs/pspnet/pspnet_r50.py
new file mode 100644
index 0000000000000000000000000000000000000000..6241fcbc5cd63164e5675046287f943fa3bbfc39
--- /dev/null
+++ b/configs/pspnet/pspnet_r50.py
@@ -0,0 +1,13 @@
+_base_ = [
+    "../_base_/models/pspnet_r50-d8.py",
+    "../_base_/datasets/grass.py",
+    "../_base_/default_runtime.py",
+    "../_base_/schedules/grass_schedule.py",
+]
+
+data_preprocessor = dict(size=(256, 256))
+model = dict(
+    data_preprocessor=data_preprocessor,
+    decode_head=dict(num_classes=5),
+    auxiliary_head=dict(num_classes=5)
+)
\ No newline at end of file
diff --git a/configs/segformer/segformer_mit-b0_water.py b/configs/segformer/segformer_mit-b0_water.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed4fc242c1d3546e6238d9119d56ceb69307f387
--- /dev/null
+++ b/configs/segformer/segformer_mit-b0_water.py
@@ -0,0 +1,14 @@
+_base_ = [
+    "../_base_/models/segformer_mit-b0.py",
+    "../_base_/datasets/water.py",
+    "../_base_/default_runtime.py",
+    "../_base_/schedules/water_schedule.py",
+]
+
+data_preprocessor = dict(size=(512, 512))
+checkpoint = "https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b0_20220624-7e0fe6dd.pth"  # noqa
+model = dict(
+    data_preprocessor=data_preprocessor,
+    backbone=dict(init_cfg=dict(type="Pretrained", checkpoint=checkpoint)),
+    decode_head=dict(num_classes=6),
+)
diff --git a/ktda/datasets/__init__.py b/ktda/datasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e0a8cf71cb62c4d1d727b2af7247b48a54674290
--- /dev/null
+++ b/ktda/datasets/__init__.py
@@ -0,0 +1,7 @@
+from .grass import GrassDataset
+from .l8_biome import L8BIOMEDataset
+
+__all__ = [
+    "GrassDataset",
+    "L8BIOMEDataset"
+]
diff --git a/ktda/datasets/grass.py b/ktda/datasets/grass.py
new file mode 100644
index 0000000000000000000000000000000000000000..2d231fea86aa620f3c8e41ea40609cf233c0f209
--- /dev/null
+++ b/ktda/datasets/grass.py
@@ -0,0 +1,55 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+from typing import List
+
+import mmengine.fileio as fileio
+
+from mmseg.registry import DATASETS
+from mmseg.datasets import BaseSegDataset
+
+
+@DATASETS.register_module()
+class GrassDataset(BaseSegDataset):
+    """grass segmentation dataset. The file structure should be.
+
+    .. code-block:: none
+
+        ├── data
+        │   ├── grass
+        │   │   ├── img_dir
+        │   │   │   ├── train
+        │   │   │   │   ├──0.tif
+        │   │   │   │   ├──...
+        │   │   │   ├── val
+        │   │   │   │   ├──9.tif
+        │   │   │   │   ├──...
+        │   │   ├── ann_dir
+        │   │   │   ├── train
+        │   │   │   │   ├──0.png
+        │   │   │   │   ├──...
+        │   │   │   ├── val
+        │   │   │   │   ├──9.png
+        │   │   │   │   ├──...
+    """
+
+    METAINFO = dict(
+        classes=("low", "middle-low", "middle", "middle-high", "high"),
+        palette=[
+            [185, 101, 71],
+            [248, 202, 155],
+            [211, 232, 158],
+            [138, 191, 104],
+            [92, 144, 77],
+        ],
+    )
+
+    def __init__(self,
+                 img_suffix='.tif',
+                 seg_map_suffix='.png',
+                 reduce_zero_label=False,
+                 **kwargs) -> None:
+        super().__init__(
+            img_suffix=img_suffix,
+            seg_map_suffix=seg_map_suffix,
+            reduce_zero_label=reduce_zero_label,
+            **kwargs)
\ No newline at end of file
diff --git a/ktda/datasets/l8_biome.py b/ktda/datasets/l8_biome.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee51c6d1f0536685fa2469a7c661aef3c210adbd
--- /dev/null
+++ b/ktda/datasets/l8_biome.py
@@ -0,0 +1,29 @@
+from mmseg.registry import DATASETS
+from mmseg.datasets import BaseSegDataset
+
+
+@DATASETS.register_module()
+class L8BIOMEDataset(BaseSegDataset):
+    METAINFO = dict(
+        classes=("Clear", "Cloud Shadow", "Thin Cloud", "Cloud"),
+        palette=[
+            [79, 253, 199],
+            [221, 53, 223],
+            [251, 255, 41],
+            [77, 2, 115],
+        ],
+    )
+
+    def __init__(
+        self,
+        img_suffix=".png",
+        seg_map_suffix=".png",
+        reduce_zero_label=False,
+        **kwargs
+    ) -> None:
+        super().__init__(
+            img_suffix=img_suffix,
+            seg_map_suffix=seg_map_suffix,
+            reduce_zero_label=reduce_zero_label,
+            **kwargs
+        )
\ No newline at end of file
diff --git a/ktda/models/__init__.py b/ktda/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..432ad1d998fe360d627fa49ddc0d8dd2b8aa8c70
--- /dev/null
+++ b/ktda/models/__init__.py
@@ -0,0 +1,4 @@
+from .segmentors import DistillEncoderDecoder
+from .adapter import FAM,FMM
+
+__all__ = ["DistillEncoderDecoder", "FAM","FMM"]
diff --git a/ktda/models/__pycache__/__init__.cpython-311.pyc b/ktda/models/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..478b796670b7c2acbe226fc34234aad057db6645
Binary files /dev/null and b/ktda/models/__pycache__/__init__.cpython-311.pyc differ
diff --git a/ktda/models/adapter/__init__.py b/ktda/models/adapter/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..449562bd04c7746b42b2e4037f3ac079427b7f11
--- /dev/null
+++ b/ktda/models/adapter/__init__.py
@@ -0,0 +1,4 @@
+from .fam import FAM
+from .fmm import FMM
+
+__all__ = ["FAM", "FMM"]
diff --git a/ktda/models/adapter/__pycache__/__init__.cpython-311.pyc b/ktda/models/adapter/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..cc98ac90964c8c7ed2f2505abab8b7638866fe16
Binary files /dev/null and b/ktda/models/adapter/__pycache__/__init__.cpython-311.pyc differ
diff --git a/ktda/models/adapter/__pycache__/fam.cpython-311.pyc b/ktda/models/adapter/__pycache__/fam.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b3614ee1f36c02b39742ec6a0d2ef0137d3b21c8
Binary files /dev/null and b/ktda/models/adapter/__pycache__/fam.cpython-311.pyc differ
diff --git a/ktda/models/adapter/__pycache__/fmm.cpython-311.pyc b/ktda/models/adapter/__pycache__/fmm.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..12af7182c8cdbfedea7d088a01af7339793ed311
Binary files /dev/null and b/ktda/models/adapter/__pycache__/fmm.cpython-311.pyc differ
diff --git a/ktda/models/adapter/fam.py b/ktda/models/adapter/fam.py
new file mode 100644
index 0000000000000000000000000000000000000000..7bdca8fbd96e84207c54f13479ca2808fe6a7702
--- /dev/null
+++ b/ktda/models/adapter/fam.py
@@ -0,0 +1,37 @@
+from mmseg.registry import MODELS
+from mmengine.model import BaseModule
+from torch import nn as nn
+from torch.nn import functional as F
+from timm.models.layers import trunc_normal_
+
+
+@MODELS.register_module()
+class FAM(BaseModule):
+    def __init__(self, in_channels, out_channels, output_size,init_cfg=None):
+        super().__init__(init_cfg)
+        self.convert = nn.ModuleList()
+        self.output_size = output_size
+        if isinstance(out_channels, int):
+            out_channels = [out_channels] * len(in_channels)
+        for in_channel, out_channel in zip(in_channels, out_channels):
+            self.convert.append(
+                nn.Conv2d(in_channel, out_channel, kernel_size=1),
+            )
+
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, (nn.Conv2d, nn.Linear)):
+            trunc_normal_(m.weight, std=.02)
+            nn.init.constant_(m.bias, 0)
+        
+
+    def forward(self, inputs):
+        outs = []
+        for index, x in enumerate(inputs):
+            x = self.convert[index](x)
+            x = F.interpolate(
+                x, size=(self.output_size,self.output_size), align_corners=False, mode="bilinear"
+            )
+            outs.append(x)
+        return tuple(outs)
diff --git a/ktda/models/adapter/fmm.py b/ktda/models/adapter/fmm.py
new file mode 100644
index 0000000000000000000000000000000000000000..cce5329feae30cd663effbd1ce383d925a1288e6
--- /dev/null
+++ b/ktda/models/adapter/fmm.py
@@ -0,0 +1,109 @@
+from mmseg.registry import MODELS
+from mmengine.model import BaseModule
+from torch import nn as nn
+from torch.nn import functional as F
+from typing import Callable, Optional
+from torch import Tensor
+from timm.models.layers import trunc_normal_
+from timm.models.vision_transformer import Block as TransformerBlock
+
+
+class Mlp(nn.Module):
+    def __init__(
+        self,
+        in_features: int,
+        hidden_features: Optional[int] = None,
+        out_features: Optional[int] = None,
+        act_layer: Callable[..., nn.Module] = nn.GELU,
+        drop: float = 0.0,
+        bias: bool = True,
+    ) -> None:
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features, bias=bias)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features, bias=bias)
+        self.drop = nn.Dropout(drop)
+
+    def forward(self, x: Tensor) -> Tensor:
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+@MODELS.register_module()
+class FMM(BaseModule):
+    def __init__(
+        self,
+        in_channels,
+        rank_dim=4,
+        mlp_nums=1,
+        model_type="mlp",
+        num_heads=8,
+        mlp_ratio=4,
+        qkv_bias=True,
+        qk_norm=False,
+        init_values=None,
+        proj_drop_rate: float = 0.0,
+        attn_drop_rate: float = 0.0,
+        init_cfg=None,
+    ):
+        super().__init__(init_cfg)
+        self.adapters = nn.ModuleList()
+        if model_type == "mlp":
+            for in_channel in in_channels:
+                mlp_list = []
+                for _ in range(mlp_nums):
+                    mlp_list.append(
+                        Mlp(
+                            in_channel,
+                            hidden_features=in_channel // rank_dim,
+                            out_features=in_channel,
+                        )
+                    )
+                mlp_model = nn.Sequential(*mlp_list)
+                self.adapters.append(mlp_model)
+
+        elif model_type == "vitBlock":
+            for in_channel in in_channels:
+                model_list = []
+                for _ in range(mlp_nums):
+                    model_list.append(
+                        TransformerBlock(
+                            in_channel,
+                            num_heads=num_heads,
+                            mlp_ratio=mlp_ratio,
+                            qkv_bias=qkv_bias,
+                            qk_norm=qk_norm,
+                            init_values=init_values,
+                            proj_drop=proj_drop_rate,
+                            attn_drop=attn_drop_rate,
+                        )
+                    )
+                self.adapters.append(nn.Sequential(*model_list))
+        
+        else:
+            raise ValueError(f"model type must in ['mlp','vitBlock'],actually is {model_type}")
+
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, (nn.Conv2d, nn.Linear)):
+            trunc_normal_(m.weight, std=0.02)
+            nn.init.constant_(m.bias, 0)
+
+    def forward(self, inputs):
+        outs = []
+        for index, x in enumerate(inputs):
+            B, C, H, W = x.shape
+            x = x.permute(0, 2, 3, 1)
+            x = x.reshape(B, -1, C)
+            x = self.adapters[index](x)
+            x = x.reshape(B, H, W, C)
+            x = x.permute(0, 3, 1, 2)
+            outs.append(x)
+        return tuple(outs)
diff --git a/ktda/models/segmentors/__pycache__/__init__.cpython-311.pyc b/ktda/models/segmentors/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8dfc14588da00dc5947602facdf53f259d4079c0
Binary files /dev/null and b/ktda/models/segmentors/__pycache__/__init__.cpython-311.pyc differ
diff --git a/ktda/models/segmentors/__pycache__/distill_encoder_decoder.cpython-311.pyc b/ktda/models/segmentors/__pycache__/distill_encoder_decoder.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..17ba771d0ff97cd7479c4c71edba66fd78d75dbe
Binary files /dev/null and b/ktda/models/segmentors/__pycache__/distill_encoder_decoder.cpython-311.pyc differ
diff --git a/ktda/models/segmentors/distill_encoder_decoder.py b/ktda/models/segmentors/distill_encoder_decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..b660b4abd565e66e866fdcb7540ddc6e3fee9ded
--- /dev/null
+++ b/ktda/models/segmentors/distill_encoder_decoder.py
@@ -0,0 +1,382 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import logging
+from typing import List, Optional
+
+import torch.nn as nn
+import torch.nn.functional as F
+from mmengine.logging import print_log
+from torch import Tensor
+
+from mmseg.registry import MODELS
+from mmseg.utils import (
+    ConfigType,
+    OptConfigType,
+    OptMultiConfig,
+    OptSampleList,
+    SampleList,
+    add_prefix,
+)
+from mmseg.models import BaseSegmentor
+
+
+@MODELS.register_module()
+class DistillEncoderDecoder(BaseSegmentor):
+
+    def __init__(
+        self,
+        backbone: ConfigType,
+        teach_backbone: ConfigType,
+        decode_head: ConfigType,
+        neck: OptConfigType = None,
+        auxiliary_head: OptConfigType = None,
+        fam: OptConfigType = None,
+        fmm: OptConfigType = None,
+        train_cfg: OptConfigType = None,
+        test_cfg: OptConfigType = None,
+        data_preprocessor: OptConfigType = None,
+        pretrained: Optional[str] = None,
+        student_training=True,
+        temperature=1.0,
+        alpha=0.5,
+        fuse=False,
+        init_cfg: OptMultiConfig = None,
+    ):
+        super().__init__(data_preprocessor=data_preprocessor, init_cfg=init_cfg)
+
+        self.temperature = temperature
+        self.alpha = alpha
+        self.student_training = student_training
+        self.fuse = fuse
+
+        if pretrained is not None:
+            assert (
+                backbone.get("pretrained") is None
+            ), "both backbone and segmentor set pretrained weight"
+            assert (
+                teach_backbone.get("pretrained") is None
+            ), "both teach backbone and segmentor set pretrained weight"
+            backbone.pretrained = pretrained
+            teach_backbone.pretrained = pretrained
+        self.backbone = MODELS.build(backbone)
+        self.teach_backbone = MODELS.build(teach_backbone)
+        if neck is not None:
+            self.neck = MODELS.build(neck)
+
+        self.fam = nn.Identity()
+        self.fmm = nn.Identity()
+        if fam is not None:
+            self.fam = MODELS.build(fam)
+        if fmm is not None:
+            self.fmm = MODELS.build(fmm)
+        self._init_decode_head(decode_head)
+        self._init_auxiliary_head(auxiliary_head)
+
+        self.train_cfg = train_cfg
+        self.test_cfg = test_cfg
+
+        assert self.with_decode_head
+
+    def _init_decode_head(self, decode_head: ConfigType) -> None:
+        """Initialize ``decode_head``"""
+        self.decode_head = MODELS.build(decode_head)
+        self.align_corners = self.decode_head.align_corners
+        self.num_classes = self.decode_head.num_classes
+        self.out_channels = self.decode_head.out_channels
+
+    def _init_auxiliary_head(self, auxiliary_head: ConfigType) -> None:
+        """Initialize ``auxiliary_head``"""
+        if auxiliary_head is not None:
+            if isinstance(auxiliary_head, list):
+                self.auxiliary_head = nn.ModuleList()
+                for head_cfg in auxiliary_head:
+                    self.auxiliary_head.append(MODELS.build(head_cfg))
+            else:
+                self.auxiliary_head = MODELS.build(auxiliary_head)
+
+    def fuse_features(self,features):
+        x = features[0]
+        for index,feature in enumerate(features):
+            if index == 0:
+                continue
+            x += feature
+        x = [x]
+        return tuple(x)
+
+    def extract_feat(self, inputs: Tensor) -> List[Tensor]:
+        """Extract features from images."""
+        x = self.backbone(inputs)
+        x = self.fam(x)
+        if self.fuse:
+            x = self.fuse_features(x)
+        if self.with_neck:
+            x = self.neck(x)
+        x = self.fmm(x)
+        return x
+
+    def encode_decode(self, inputs: Tensor, batch_img_metas: List[dict]) -> Tensor:
+        """Encode images with backbone and decode into a semantic segmentation
+        map of the same size as input."""
+        x = self.extract_feat(inputs)
+        seg_logits = self.decode_head.predict(x, batch_img_metas, self.test_cfg)
+
+        return seg_logits
+
+    def _decode_head_forward_train(
+        self, inputs: List[Tensor], data_samples: SampleList
+    ) -> dict:
+        """Run forward function and calculate loss for decode head in
+        training."""
+        losses = dict()
+        loss_decode = self.decode_head.loss(inputs, data_samples, self.train_cfg)
+
+        losses.update(add_prefix(loss_decode, "decode"))
+        return losses
+
+    def _auxiliary_head_forward_train(
+        self, inputs: List[Tensor], data_samples: SampleList
+    ) -> dict:
+        """Run forward function and calculate loss for auxiliary head in
+        training."""
+        losses = dict()
+        if isinstance(self.auxiliary_head, nn.ModuleList):
+            for idx, aux_head in enumerate(self.auxiliary_head):
+                loss_aux = aux_head.loss(inputs, data_samples, self.train_cfg)
+                for key in loss_aux.keys():
+                    loss_aux[key] = loss_aux[key] / len(self.auxiliary_head)
+                losses.update(add_prefix(loss_aux, f"aux_{idx}"))
+        else:
+            loss_aux = self.auxiliary_head.loss(inputs, data_samples, self.train_cfg)
+            losses.update(add_prefix(loss_aux, "aux"))
+
+        return losses
+
+    def calculate_diltill_loss(self, inputs):
+        student_feats = self.backbone(inputs)
+        student_feats = self.fam(student_feats)
+        teach_feats = self.teach_backbone(inputs)
+
+        if self.fuse:
+            student_feats = self.fuse_features(student_feats)
+            teach_feats = self.fuse_features(teach_feats)
+
+        total_loss = 0.0
+        for student_feat, teach_feat in zip(student_feats, teach_feats):
+            student_prob = F.softmax(student_feat / self.temperature, dim=-1)
+            teach_prob = F.softmax(teach_feat / self.temperature, dim=-1)
+            kl_loss = F.kl_div(
+                student_prob.log(), teach_prob, reduction="batchmean"
+            ) * (self.temperature**2)
+            mse_loss = F.mse_loss(student_feat, teach_feat, reduction="mean")
+            loss = self.alpha * kl_loss + (1 - self.alpha) * mse_loss
+            total_loss += loss
+
+        avg_loss = total_loss / len(student_feats)
+        if self.alpha == 0:
+            avg_loss = avg_loss * 0.5
+        return avg_loss
+
+    def loss(self, inputs: Tensor, data_samples: SampleList) -> dict:
+        """Calculate losses from a batch of inputs and data samples.
+
+        Args:
+            inputs (Tensor): Input images.
+            data_samples (list[:obj:`SegDataSample`]): The seg data samples.
+                It usually includes information such as `metainfo` and
+                `gt_sem_seg`.
+
+        Returns:
+            dict[str, Tensor]: a dictionary of loss components
+        """
+
+        x = self.extract_feat(inputs)
+
+        losses = dict()
+
+        loss_decode = self._decode_head_forward_train(x, data_samples)
+        losses.update(loss_decode)
+        if self.student_training:
+            losses["distill_loss"] = self.calculate_diltill_loss(inputs)
+        if self.with_auxiliary_head:
+            loss_aux = self._auxiliary_head_forward_train(x, data_samples)
+            losses.update(loss_aux)
+
+        return losses
+
+    def predict(self, inputs: Tensor, data_samples: OptSampleList = None) -> SampleList:
+        """Predict results from a batch of inputs and data samples with post-
+        processing.
+
+        Args:
+            inputs (Tensor): Inputs with shape (N, C, H, W).
+            data_samples (List[:obj:`SegDataSample`], optional): The seg data
+                samples. It usually includes information such as `metainfo`
+                and `gt_sem_seg`.
+
+        Returns:
+            list[:obj:`SegDataSample`]: Segmentation results of the
+            input images. Each SegDataSample usually contain:
+
+            - ``pred_sem_seg``(PixelData): Prediction of semantic segmentation.
+            - ``seg_logits``(PixelData): Predicted logits of semantic
+                segmentation before normalization.
+        """
+        if data_samples is not None:
+            batch_img_metas = [data_sample.metainfo for data_sample in data_samples]
+        else:
+            batch_img_metas = [
+                dict(
+                    ori_shape=inputs.shape[2:],
+                    img_shape=inputs.shape[2:],
+                    pad_shape=inputs.shape[2:],
+                    padding_size=[0, 0, 0, 0],
+                )
+            ] * inputs.shape[0]
+
+        seg_logits = self.inference(inputs, batch_img_metas)
+
+        return self.postprocess_result(seg_logits, data_samples)
+
+    def _forward(self, inputs: Tensor, data_samples: OptSampleList = None) -> Tensor:
+        """Network forward process.
+
+        Args:
+            inputs (Tensor): Inputs with shape (N, C, H, W).
+            data_samples (List[:obj:`SegDataSample`]): The seg
+                data samples. It usually includes information such
+                as `metainfo` and `gt_sem_seg`.
+
+        Returns:
+            Tensor: Forward output of model without any post-processes.
+        """
+        x = self.extract_feat(inputs)
+        return self.decode_head.forward(x)
+
+    def slide_inference(self, inputs: Tensor, batch_img_metas: List[dict]) -> Tensor:
+        """Inference by sliding-window with overlap.
+
+        If h_crop > h_img or w_crop > w_img, the small patch will be used to
+        decode without padding.
+
+        Args:
+            inputs (tensor): the tensor should have a shape NxCxHxW,
+                which contains all images in the batch.
+            batch_img_metas (List[dict]): List of image metainfo where each may
+                also contain: 'img_shape', 'scale_factor', 'flip', 'img_path',
+                'ori_shape', and 'pad_shape'.
+                For details on the values of these keys see
+                `mmseg/datasets/pipelines/formatting.py:PackSegInputs`.
+
+        Returns:
+            Tensor: The segmentation results, seg_logits from model of each
+                input image.
+        """
+
+        h_stride, w_stride = self.test_cfg.stride
+        h_crop, w_crop = self.test_cfg.crop_size
+        batch_size, _, h_img, w_img = inputs.size()
+        out_channels = self.out_channels
+        h_grids = max(h_img - h_crop + h_stride - 1, 0) // h_stride + 1
+        w_grids = max(w_img - w_crop + w_stride - 1, 0) // w_stride + 1
+        preds = inputs.new_zeros((batch_size, out_channels, h_img, w_img))
+        count_mat = inputs.new_zeros((batch_size, 1, h_img, w_img))
+        for h_idx in range(h_grids):
+            for w_idx in range(w_grids):
+                y1 = h_idx * h_stride
+                x1 = w_idx * w_stride
+                y2 = min(y1 + h_crop, h_img)
+                x2 = min(x1 + w_crop, w_img)
+                y1 = max(y2 - h_crop, 0)
+                x1 = max(x2 - w_crop, 0)
+                crop_img = inputs[:, :, y1:y2, x1:x2]
+                # change the image shape to patch shape
+                batch_img_metas[0]["img_shape"] = crop_img.shape[2:]
+                # the output of encode_decode is seg logits tensor map
+                # with shape [N, C, H, W]
+                crop_seg_logit = self.encode_decode(crop_img, batch_img_metas)
+                preds += F.pad(
+                    crop_seg_logit,
+                    (
+                        int(x1),
+                        int(preds.shape[3] - x2),
+                        int(y1),
+                        int(preds.shape[2] - y2),
+                    ),
+                )
+
+                count_mat[:, :, y1:y2, x1:x2] += 1
+        assert (count_mat == 0).sum() == 0
+        seg_logits = preds / count_mat
+
+        return seg_logits
+
+    def whole_inference(self, inputs: Tensor, batch_img_metas: List[dict]) -> Tensor:
+        """Inference with full image.
+
+        Args:
+            inputs (Tensor): The tensor should have a shape NxCxHxW, which
+                contains all images in the batch.
+            batch_img_metas (List[dict]): List of image metainfo where each may
+                also contain: 'img_shape', 'scale_factor', 'flip', 'img_path',
+                'ori_shape', and 'pad_shape'.
+                For details on the values of these keys see
+                `mmseg/datasets/pipelines/formatting.py:PackSegInputs`.
+
+        Returns:
+            Tensor: The segmentation results, seg_logits from model of each
+                input image.
+        """
+
+        seg_logits = self.encode_decode(inputs, batch_img_metas)
+
+        return seg_logits
+
+    def inference(self, inputs: Tensor, batch_img_metas: List[dict]) -> Tensor:
+        """Inference with slide/whole style.
+
+        Args:
+            inputs (Tensor): The input image of shape (N, 3, H, W).
+            batch_img_metas (List[dict]): List of image metainfo where each may
+                also contain: 'img_shape', 'scale_factor', 'flip', 'img_path',
+                'ori_shape', 'pad_shape', and 'padding_size'.
+                For details on the values of these keys see
+                `mmseg/datasets/pipelines/formatting.py:PackSegInputs`.
+
+        Returns:
+            Tensor: The segmentation results, seg_logits from model of each
+                input image.
+        """
+        assert self.test_cfg.get("mode", "whole") in ["slide", "whole"], (
+            f'Only "slide" or "whole" test mode are supported, but got '
+            f'{self.test_cfg["mode"]}.'
+        )
+        ori_shape = batch_img_metas[0]["ori_shape"]
+        if not all(_["ori_shape"] == ori_shape for _ in batch_img_metas):
+            print_log(
+                "Image shapes are different in the batch.",
+                logger="current",
+                level=logging.WARN,
+            )
+        if self.test_cfg.mode == "slide":
+            seg_logit = self.slide_inference(inputs, batch_img_metas)
+        else:
+            seg_logit = self.whole_inference(inputs, batch_img_metas)
+
+        return seg_logit
+
+    def aug_test(self, inputs, batch_img_metas, rescale=True):
+        """Test with augmentations.
+
+        Only rescale=True is supported.
+        """
+        # aug_test rescale all imgs back to ori_shape for now
+        assert rescale
+        # to save memory, we get augmented seg logit inplace
+        seg_logit = self.inference(inputs[0], batch_img_metas[0], rescale)
+        for i in range(1, len(inputs)):
+            cur_seg_logit = self.inference(inputs[i], batch_img_metas[i], rescale)
+            seg_logit += cur_seg_logit
+        seg_logit /= len(inputs)
+        seg_pred = seg_logit.argmax(dim=1)
+        # unravel batch dim
+        seg_pred = list(seg_pred)
+        return seg_pred
diff --git a/requirements/docs.txt b/requirements/docs.txt
new file mode 100644
index 0000000000000000000000000000000000000000..19632d36aba506859c4dab152a54ca3bb459d757
--- /dev/null
+++ b/requirements/docs.txt
@@ -0,0 +1,7 @@
+docutils==0.16.0
+myst-parser
+-e git+https://github.com/open-mmlab/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
+sphinx==4.0.2
+sphinx_copybutton
+sphinx_markdown_tables
+urllib3<2.0.0
diff --git a/requirements/optional.txt b/requirements/optional.txt
new file mode 100644
index 0000000000000000000000000000000000000000..b0310f52960e40d5bd98c3ad1608d0249b7ccf9f
--- /dev/null
+++ b/requirements/optional.txt
@@ -0,0 +1,22 @@
+cityscapesscripts
+-e git+https://github.com/openai/CLIP.git@main#egg=clip
+
+# for vpd model
+diffusers
+einops==0.3.0
+imageio==2.9.0
+imageio-ffmpeg==0.4.2
+invisible-watermark
+kornia==0.6
+-e git+https://github.com/CompVis/stable-diffusion@21f890f#egg=latent-diffusion
+nibabel
+omegaconf==2.1.1
+pudb==2019.2
+pytorch-lightning==1.4.2
+streamlit>=0.73.1
+-e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
+test-tube>=0.7.5
+timm
+torch-fidelity==0.3.0
+torchmetrics==0.6.0
+transformers==4.19.2
diff --git a/requirements/runtime.txt b/requirements/runtime.txt
new file mode 100644
index 0000000000000000000000000000000000000000..3e242581e997733262c5cfbaf5bd76c9a4756745
--- /dev/null
+++ b/requirements/runtime.txt
@@ -0,0 +1,5 @@
+matplotlib
+numpy
+packaging
+prettytable
+scipy
diff --git a/tools/analysis_tools/analyze_logs.py b/tools/analysis_tools/analyze_logs.py
new file mode 100644
index 0000000000000000000000000000000000000000..7464d231621b17249ce69f358479bbba42757362
--- /dev/null
+++ b/tools/analysis_tools/analyze_logs.py
@@ -0,0 +1,130 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+"""Modified from https://github.com/open-
+mmlab/mmdetection/blob/master/tools/analysis_tools/analyze_logs.py."""
+import argparse
+import json
+from collections import defaultdict
+
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+
+def plot_curve(log_dicts, args):
+    if args.backend is not None:
+        plt.switch_backend(args.backend)
+    sns.set_style(args.style)
+    # if legend is None, use {filename}_{key} as legend
+    legend = args.legend
+    if legend is None:
+        legend = []
+        for json_log in args.json_logs:
+            for metric in args.keys:
+                legend.append(f'{json_log}_{metric}')
+    assert len(legend) == (len(args.json_logs) * len(args.keys))
+    metrics = args.keys
+
+    num_metrics = len(metrics)
+    for i, log_dict in enumerate(log_dicts):
+        epochs = list(log_dict.keys())
+        for j, metric in enumerate(metrics):
+            print(f'plot curve of {args.json_logs[i]}, metric is {metric}')
+            plot_epochs = []
+            plot_iters = []
+            plot_values = []
+            # In some log files exist lines of validation,
+            # `mode` list is used to only collect iter number
+            # of training line.
+            for epoch in epochs:
+                epoch_logs = log_dict[epoch]
+                if metric not in epoch_logs.keys():
+                    continue
+                if metric in ['mIoU', 'mAcc', 'aAcc']:
+                    plot_epochs.append(epoch)
+                    plot_values.append(epoch_logs[metric][0])
+                else:
+                    for idx in range(len(epoch_logs[metric])):
+                        plot_iters.append(epoch_logs['step'][idx])
+                        plot_values.append(epoch_logs[metric][idx])
+            ax = plt.gca()
+            label = legend[i * num_metrics + j]
+            if metric in ['mIoU', 'mAcc', 'aAcc']:
+                ax.set_xticks(plot_epochs)
+                plt.xlabel('step')
+                plt.plot(plot_epochs, plot_values, label=label, marker='o')
+            else:
+                plt.xlabel('iter')
+                plt.plot(plot_iters, plot_values, label=label, linewidth=0.5)
+        plt.legend()
+        if args.title is not None:
+            plt.title(args.title)
+    if args.out is None:
+        plt.show()
+    else:
+        print(f'save curve to: {args.out}')
+        plt.savefig(args.out)
+        plt.cla()
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Analyze Json Log')
+    parser.add_argument(
+        'json_logs',
+        type=str,
+        nargs='+',
+        help='path of train log in json format')
+    parser.add_argument(
+        '--keys',
+        type=str,
+        nargs='+',
+        default=['mIoU'],
+        help='the metric that you want to plot')
+    parser.add_argument('--title', type=str, help='title of figure')
+    parser.add_argument(
+        '--legend',
+        type=str,
+        nargs='+',
+        default=None,
+        help='legend of each plot')
+    parser.add_argument(
+        '--backend', type=str, default=None, help='backend of plt')
+    parser.add_argument(
+        '--style', type=str, default='dark', help='style of plt')
+    parser.add_argument('--out', type=str, default=None)
+    args = parser.parse_args()
+    return args
+
+
+def load_json_logs(json_logs):
+    # load and convert json_logs to log_dict, key is step, value is a sub dict
+    # keys of sub dict is different metrics
+    # value of sub dict is a list of corresponding values of all iterations
+    log_dicts = [dict() for _ in json_logs]
+    prev_step = 0
+    for json_log, log_dict in zip(json_logs, log_dicts):
+        with open(json_log) as log_file:
+            for line in log_file:
+                log = json.loads(line.strip())
+                # the final step in json file is 0.
+                if 'step' in log and log['step'] != 0:
+                    step = log['step']
+                    prev_step = step
+                else:
+                    step = prev_step
+                if step not in log_dict:
+                    log_dict[step] = defaultdict(list)
+                for k, v in log.items():
+                    log_dict[step][k].append(v)
+    return log_dicts
+
+
+def main():
+    args = parse_args()
+    json_logs = args.json_logs
+    for json_log in json_logs:
+        assert json_log.endswith('.json')
+    log_dicts = load_json_logs(json_logs)
+    plot_curve(log_dicts, args)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/analysis_tools/benchmark.py b/tools/analysis_tools/benchmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..afaeabac85fa642b03c006b8a920c0d95d4cb400
--- /dev/null
+++ b/tools/analysis_tools/benchmark.py
@@ -0,0 +1,121 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+import time
+
+import numpy as np
+import torch
+from mmengine import Config
+from mmengine.fileio import dump
+from mmengine.model.utils import revert_sync_batchnorm
+from mmengine.registry import init_default_scope
+from mmengine.runner import Runner, load_checkpoint
+from mmengine.utils import mkdir_or_exist
+
+from mmseg.registry import MODELS
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='MMSeg benchmark a model')
+    parser.add_argument('config', help='test config file path')
+    parser.add_argument('checkpoint', help='checkpoint file')
+    parser.add_argument(
+        '--log-interval', type=int, default=50, help='interval of logging')
+    parser.add_argument(
+        '--work-dir',
+        help=('if specified, the results will be dumped '
+              'into the directory as json'))
+    parser.add_argument('--repeat-times', type=int, default=1)
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    cfg = Config.fromfile(args.config)
+
+    init_default_scope(cfg.get('default_scope', 'mmseg'))
+
+    timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
+    if args.work_dir is not None:
+        mkdir_or_exist(osp.abspath(args.work_dir))
+        json_file = osp.join(args.work_dir, f'fps_{timestamp}.json')
+    else:
+        # use config filename as default work_dir if cfg.work_dir is None
+        work_dir = osp.join('./work_dirs',
+                            osp.splitext(osp.basename(args.config))[0])
+        mkdir_or_exist(osp.abspath(work_dir))
+        json_file = osp.join(work_dir, f'fps_{timestamp}.json')
+
+    repeat_times = args.repeat_times
+    # set cudnn_benchmark
+    torch.backends.cudnn.benchmark = False
+    cfg.model.pretrained = None
+
+    benchmark_dict = dict(config=args.config, unit='img / s')
+    overall_fps_list = []
+    cfg.test_dataloader.batch_size = 1
+    for time_index in range(repeat_times):
+        print(f'Run {time_index + 1}:')
+        # build the dataloader
+        data_loader = Runner.build_dataloader(cfg.test_dataloader)
+
+        # build the model and load checkpoint
+        cfg.model.train_cfg = None
+        model = MODELS.build(cfg.model)
+
+        if 'checkpoint' in args and osp.exists(args.checkpoint):
+            load_checkpoint(model, args.checkpoint, map_location='cpu')
+
+        if torch.cuda.is_available():
+            model = model.cuda()
+
+        model = revert_sync_batchnorm(model)
+
+        model.eval()
+
+        # the first several iterations may be very slow so skip them
+        num_warmup = 5
+        pure_inf_time = 0
+        total_iters = 200
+
+        # benchmark with 200 batches and take the average
+        for i, data in enumerate(data_loader):
+            data = model.data_preprocessor(data, True)
+            inputs = data['inputs']
+            data_samples = data['data_samples']
+            if torch.cuda.is_available():
+                torch.cuda.synchronize()
+            start_time = time.perf_counter()
+
+            with torch.no_grad():
+                model(inputs, data_samples, mode='predict')
+
+            if torch.cuda.is_available():
+                torch.cuda.synchronize()
+            elapsed = time.perf_counter() - start_time
+
+            if i >= num_warmup:
+                pure_inf_time += elapsed
+                if (i + 1) % args.log_interval == 0:
+                    fps = (i + 1 - num_warmup) / pure_inf_time
+                    print(f'Done image [{i + 1:<3}/ {total_iters}], '
+                          f'fps: {fps:.2f} img / s')
+
+            if (i + 1) == total_iters:
+                fps = (i + 1 - num_warmup) / pure_inf_time
+                print(f'Overall fps: {fps:.2f} img / s\n')
+                benchmark_dict[f'overall_fps_{time_index + 1}'] = round(fps, 2)
+                overall_fps_list.append(fps)
+                break
+    benchmark_dict['average_fps'] = round(np.mean(overall_fps_list), 2)
+    benchmark_dict['fps_variance'] = round(np.var(overall_fps_list), 4)
+    print(f'Average fps of {repeat_times} evaluations: '
+          f'{benchmark_dict["average_fps"]}')
+    print(f'The variance of {repeat_times} evaluations: '
+          f'{benchmark_dict["fps_variance"]}')
+    dump(benchmark_dict, json_file, indent=4)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/analysis_tools/confusion_matrix.py b/tools/analysis_tools/confusion_matrix.py
new file mode 100644
index 0000000000000000000000000000000000000000..39756cdfdd2341e7e02f9de24077da880b6021c3
--- /dev/null
+++ b/tools/analysis_tools/confusion_matrix.py
@@ -0,0 +1,197 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os
+
+import matplotlib.pyplot as plt
+import numpy as np
+from matplotlib.ticker import MultipleLocator
+from mmengine.config import Config, DictAction
+from mmengine.registry import init_default_scope
+from mmengine.utils import mkdir_or_exist, progressbar
+from PIL import Image
+
+from mmseg.registry import DATASETS
+
+init_default_scope('mmseg')
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Generate confusion matrix from segmentation results')
+    parser.add_argument('config', help='test config file path')
+    parser.add_argument(
+        'prediction_path', help='prediction path where test folder result')
+    parser.add_argument(
+        'save_dir', help='directory where confusion matrix will be saved')
+    parser.add_argument(
+        '--show', action='store_true', help='show confusion matrix')
+    parser.add_argument(
+        '--color-theme',
+        default='winter',
+        help='theme of the matrix color map')
+    parser.add_argument(
+        '--title',
+        default='Normalized Confusion Matrix',
+        help='title of the matrix color map')
+    parser.add_argument(
+        '--cfg-options',
+        nargs='+',
+        action=DictAction,
+        help='override some settings in the used config, the key-value pair '
+        'in xxx=yyy format will be merged into config file. If the value to '
+        'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
+        'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
+        'Note that the quotation marks are necessary and that no white space '
+        'is allowed.')
+    args = parser.parse_args()
+    return args
+
+
+def calculate_confusion_matrix(dataset, results):
+    """Calculate the confusion matrix.
+
+    Args:
+        dataset (Dataset): Test or val dataset.
+        results (list[ndarray]): A list of segmentation results in each image.
+    """
+    n = len(dataset.METAINFO['classes'])
+    confusion_matrix = np.zeros(shape=[n, n])
+    assert len(dataset) == len(results)
+    ignore_index = dataset.ignore_index
+    reduce_zero_label = dataset.reduce_zero_label
+    prog_bar = progressbar.ProgressBar(len(results))
+    for idx, per_img_res in enumerate(results):
+        res_segm = per_img_res
+        gt_segm = dataset[idx]['data_samples'] \
+            .gt_sem_seg.data.squeeze().numpy().astype(np.uint8)
+        gt_segm, res_segm = gt_segm.flatten(), res_segm.flatten()
+        if reduce_zero_label:
+            gt_segm = gt_segm - 1
+        to_ignore = gt_segm == ignore_index
+
+        gt_segm, res_segm = gt_segm[~to_ignore], res_segm[~to_ignore]
+        inds = n * gt_segm + res_segm
+        mat = np.bincount(inds, minlength=n**2).reshape(n, n)
+        confusion_matrix += mat
+        prog_bar.update()
+    return confusion_matrix
+
+
+def plot_confusion_matrix(confusion_matrix,
+                          labels,
+                          save_dir=None,
+                          show=True,
+                          title='Normalized Confusion Matrix',
+                          color_theme='OrRd'):
+    """Draw confusion matrix with matplotlib.
+
+    Args:
+        confusion_matrix (ndarray): The confusion matrix.
+        labels (list[str]): List of class names.
+        save_dir (str|optional): If set, save the confusion matrix plot to the
+            given path. Default: None.
+        show (bool): Whether to show the plot. Default: True.
+        title (str): Title of the plot. Default: `Normalized Confusion Matrix`.
+        color_theme (str): Theme of the matrix color map. Default: `winter`.
+    """
+    # normalize the confusion matrix
+    per_label_sums = confusion_matrix.sum(axis=1)[:, np.newaxis]
+    confusion_matrix = \
+        confusion_matrix.astype(np.float32) / per_label_sums * 100
+
+    num_classes = len(labels)
+    fig, ax = plt.subplots(
+        figsize=(2 * num_classes, 2 * num_classes * 0.8), dpi=300)
+    cmap = plt.get_cmap(color_theme)
+    im = ax.imshow(confusion_matrix, cmap=cmap)
+    colorbar = plt.colorbar(mappable=im, ax=ax)
+    colorbar.ax.tick_params(labelsize=20)  # 设置 colorbar 标签的字体大小
+
+    title_font = {'weight': 'bold', 'size': 20}
+    ax.set_title(title, fontdict=title_font)
+    label_font = {'size': 40}
+    plt.ylabel('Ground Truth Label', fontdict=label_font)
+    plt.xlabel('Prediction Label', fontdict=label_font)
+
+    # draw locator
+    xmajor_locator = MultipleLocator(1)
+    xminor_locator = MultipleLocator(0.5)
+    ax.xaxis.set_major_locator(xmajor_locator)
+    ax.xaxis.set_minor_locator(xminor_locator)
+    ymajor_locator = MultipleLocator(1)
+    yminor_locator = MultipleLocator(0.5)
+    ax.yaxis.set_major_locator(ymajor_locator)
+    ax.yaxis.set_minor_locator(yminor_locator)
+
+    # draw grid
+    ax.grid(True, which='minor', linestyle='-')
+
+    # draw label
+    ax.set_xticks(np.arange(num_classes))
+    ax.set_yticks(np.arange(num_classes))
+    ax.set_xticklabels(labels, fontsize=20)
+    ax.set_yticklabels(labels, fontsize=20)
+
+    ax.tick_params(
+        axis='x', bottom=False, top=True, labelbottom=False, labeltop=True)
+    plt.setp(
+        ax.get_xticklabels(), rotation=45, ha='left', rotation_mode='anchor')
+
+    # draw confusion matrix value
+    for i in range(num_classes):
+        for j in range(num_classes):
+            ax.text(
+                j,
+                i,
+                '{}%'.format(
+                    round(confusion_matrix[i, j], 2
+                          ) if not np.isnan(confusion_matrix[i, j]) else -1),
+                ha='center',
+                va='center',
+                color='k',
+                size=20)
+
+    ax.set_ylim(len(confusion_matrix) - 0.5, -0.5)  # matplotlib>3.1.1
+
+    fig.tight_layout()
+    if save_dir is not None:
+        mkdir_or_exist(save_dir)
+        plt.savefig(
+            os.path.join(save_dir, 'confusion_matrix.png'), format='png')
+    if show:
+        plt.show()
+
+
+def main():
+    args = parse_args()
+
+    cfg = Config.fromfile(args.config)
+    if args.cfg_options is not None:
+        cfg.merge_from_dict(args.cfg_options)
+
+    results = []
+    for img in sorted(os.listdir(args.prediction_path)):
+        img = os.path.join(args.prediction_path, img)
+        image = Image.open(img)
+        image = np.copy(image)
+        results.append(image)
+
+    assert isinstance(results, list)
+    if isinstance(results[0], np.ndarray):
+        pass
+    else:
+        raise TypeError('invalid type of prediction results')
+
+    dataset = DATASETS.build(cfg.test_dataloader.dataset)
+    confusion_matrix = calculate_confusion_matrix(dataset, results)
+    plot_confusion_matrix(
+        confusion_matrix,
+        dataset.METAINFO['classes'],
+        save_dir=args.save_dir,
+        show=args.show,
+        title=args.title,
+        color_theme=args.color_theme)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/analysis_tools/get_flops.py b/tools/analysis_tools/get_flops.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1563587180908e15ab96161d0fcde6366d5a62c
--- /dev/null
+++ b/tools/analysis_tools/get_flops.py
@@ -0,0 +1,126 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import tempfile
+from pathlib import Path
+
+import torch
+from mmengine import Config, DictAction
+from mmengine.logging import MMLogger
+from mmengine.model import revert_sync_batchnorm
+from mmengine.registry import init_default_scope
+
+from mmseg.models import BaseSegmentor
+from mmseg.registry import MODELS
+from mmseg.structures import SegDataSample
+from vegseg import models
+try:
+    from mmengine.analysis import get_model_complexity_info
+    from mmengine.analysis.print_helper import _format_size
+except ImportError:
+    raise ImportError('Please upgrade mmengine >= 0.6.0 to use this script.')
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Get the FLOPs of a segmentor')
+    parser.add_argument('config', help='train config file path')
+    parser.add_argument(
+        '--shape',
+        type=int,
+        nargs='+',
+        default=[2048, 1024],
+        help='input image size')
+    parser.add_argument(
+        '--cfg-options',
+        nargs='+',
+        action=DictAction,
+        help='override some settings in the used config, the key-value pair '
+        'in xxx=yyy format will be merged into config file. If the value to '
+        'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
+        'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
+        'Note that the quotation marks are necessary and that no white space '
+        'is allowed.')
+    args = parser.parse_args()
+    return args
+
+
+def inference(args: argparse.Namespace, logger: MMLogger) -> dict:
+    config_name = Path(args.config)
+
+    if not config_name.exists():
+        logger.error(f'Config file {config_name} does not exist')
+
+    cfg: Config = Config.fromfile(config_name)
+    cfg.work_dir = tempfile.TemporaryDirectory().name
+    cfg.log_level = 'WARN'
+    if args.cfg_options is not None:
+        cfg.merge_from_dict(args.cfg_options)
+
+    init_default_scope(cfg.get('scope', 'mmseg'))
+
+    if len(args.shape) == 1:
+        input_shape = (3, args.shape[0], args.shape[0])
+    elif len(args.shape) == 2:
+        input_shape = (3, ) + tuple(args.shape)
+    else:
+        raise ValueError('invalid input shape')
+    result = {}
+
+    model: BaseSegmentor = MODELS.build(cfg.model)
+    if hasattr(model, 'auxiliary_head'):
+        model.auxiliary_head = None
+    if hasattr(model, 'teach_backbone'):
+        model.teach_backbone = None
+    if torch.cuda.is_available():
+        model.cuda()
+    model = revert_sync_batchnorm(model)
+    result['ori_shape'] = input_shape[-2:]
+    result['pad_shape'] = input_shape[-2:]
+    data_batch = {
+        'inputs': [torch.rand(input_shape)],
+        'data_samples': [SegDataSample(metainfo=result)]
+    }
+    data = model.data_preprocessor(data_batch)
+    model.eval()
+    if cfg.model.decode_head.type in ['MaskFormerHead', 'Mask2FormerHead']:
+        # TODO: Support MaskFormer and Mask2Former
+        raise NotImplementedError('MaskFormer and Mask2Former are not '
+                                  'supported yet.')
+    outputs = get_model_complexity_info(
+        model,
+        input_shape=None,
+        inputs=data['inputs'],
+        show_table=False,
+        show_arch=False)
+    result['flops'] = _format_size(outputs['flops'])
+    result['params'] = _format_size(outputs['params'])
+    result['compute_type'] = 'direct: randomly generate a picture'
+    return result
+
+
+def main():
+
+    args = parse_args()
+    logger = MMLogger.get_instance(name='MMLogger')
+
+    result = inference(args, logger)
+    split_line = '=' * 30
+    ori_shape = result['ori_shape']
+    pad_shape = result['pad_shape']
+    flops = result['flops']
+    params = result['params']
+    compute_type = result['compute_type']
+
+    if pad_shape != ori_shape:
+        print(f'{split_line}\nUse size divisor set input shape '
+              f'from {ori_shape} to {pad_shape}')
+    print(f'{split_line}\nCompute type: {compute_type}\n'
+          f'Input shape: {pad_shape}\nFlops: {flops}\n'
+          f'Params: {params}\n{split_line}')
+    print('!!!Please be cautious if you use the results in papers. '
+          'You may need to check if all ops are supported and verify '
+          'that the flops computation is correct.')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/analysis_tools/visualization_cam.py b/tools/analysis_tools/visualization_cam.py
new file mode 100644
index 0000000000000000000000000000000000000000..00cdb3e04ab1f9000844ace781bc138f230d4630
--- /dev/null
+++ b/tools/analysis_tools/visualization_cam.py
@@ -0,0 +1,127 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+"""Use the pytorch-grad-cam tool to visualize Class Activation Maps (CAM).
+
+requirement: pip install grad-cam
+"""
+
+from argparse import ArgumentParser
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from mmengine import Config
+from mmengine.model import revert_sync_batchnorm
+from PIL import Image
+from pytorch_grad_cam import GradCAM
+from pytorch_grad_cam.utils.image import preprocess_image, show_cam_on_image
+
+from mmseg.apis import inference_model, init_model, show_result_pyplot
+from mmseg.utils import register_all_modules
+
+
+class SemanticSegmentationTarget:
+    """wrap the model.
+
+    requirement: pip install grad-cam
+
+    Args:
+        category (int): Visualization class.
+        mask (ndarray): Mask of class.
+        size (tuple): Image size.
+    """
+
+    def __init__(self, category, mask, size):
+        self.category = category
+        self.mask = torch.from_numpy(mask)
+        self.size = size
+        if torch.cuda.is_available():
+            self.mask = self.mask.cuda()
+
+    def __call__(self, model_output):
+        model_output = torch.unsqueeze(model_output, dim=0)
+        model_output = F.interpolate(
+            model_output, size=self.size, mode='bilinear')
+        model_output = torch.squeeze(model_output, dim=0)
+
+        return (model_output[self.category, :, :] * self.mask).sum()
+
+
+def main():
+    parser = ArgumentParser()
+    parser.add_argument('img', help='Image file')
+    parser.add_argument('config', help='Config file')
+    parser.add_argument('checkpoint', help='Checkpoint file')
+    parser.add_argument(
+        '--out-file',
+        default='prediction.png',
+        help='Path to output prediction file')
+    parser.add_argument(
+        '--cam-file', default='vis_cam.png', help='Path to output cam file')
+    parser.add_argument(
+        '--target-layers',
+        default='backbone.layer4[2]',
+        help='Target layers to visualize CAM')
+    parser.add_argument(
+        '--category-index', default='7', help='Category to visualize CAM')
+    parser.add_argument(
+        '--device', default='cuda:0', help='Device used for inference')
+    args = parser.parse_args()
+
+    # build the model from a config file and a checkpoint file
+    register_all_modules()
+    model = init_model(args.config, args.checkpoint, device=args.device)
+    if args.device == 'cpu':
+        model = revert_sync_batchnorm(model)
+
+    # test a single image
+    result = inference_model(model, args.img)
+
+    # show the results
+    show_result_pyplot(
+        model,
+        args.img,
+        result,
+        draw_gt=False,
+        show=False if args.out_file is not None else True,
+        out_file=args.out_file)
+
+    # result data conversion
+    prediction_data = result.pred_sem_seg.data
+    pre_np_data = prediction_data.cpu().numpy().squeeze(0)
+
+    target_layers = args.target_layers
+    target_layers = [eval(f'model.{target_layers}')]
+
+    category = int(args.category_index)
+    mask_float = np.float32(pre_np_data == category)
+
+    # data processing
+    image = np.array(Image.open(args.img).convert('RGB'))
+    height, width = image.shape[0], image.shape[1]
+    rgb_img = np.float32(image) / 255
+    config = Config.fromfile(args.config)
+    image_mean = config.data_preprocessor['mean']
+    image_std = config.data_preprocessor['std']
+    input_tensor = preprocess_image(
+        rgb_img,
+        mean=[x / 255 for x in image_mean],
+        std=[x / 255 for x in image_std])
+
+    # Grad CAM(Class Activation Maps)
+    # Can also be LayerCAM, XGradCAM, GradCAMPlusPlus, EigenCAM, EigenGradCAM
+    targets = [
+        SemanticSegmentationTarget(category, mask_float, (height, width))
+    ]
+    with GradCAM(
+            model=model,
+            target_layers=target_layers,
+            use_cuda=torch.cuda.is_available()) as cam:
+        grayscale_cam = cam(input_tensor=input_tensor, targets=targets)[0, :]
+        cam_image = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True)
+
+        # save cam file
+        Image.fromarray(cam_image).save(args.cam_file)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/dataset_converters/chase_db1.py b/tools/dataset_converters/chase_db1.py
new file mode 100644
index 0000000000000000000000000000000000000000..f4fefbd77435c5745d290269cd00f67fda604455
--- /dev/null
+++ b/tools/dataset_converters/chase_db1.py
@@ -0,0 +1,89 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os
+import os.path as osp
+import tempfile
+import zipfile
+
+import mmcv
+from mmengine.utils import mkdir_or_exist
+
+CHASE_DB1_LEN = 28 * 3
+TRAINING_LEN = 60
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert CHASE_DB1 dataset to mmsegmentation format')
+    parser.add_argument('dataset_path', help='path of CHASEDB1.zip')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    dataset_path = args.dataset_path
+    if args.out_dir is None:
+        out_dir = osp.join('data', 'CHASE_DB1')
+    else:
+        out_dir = args.out_dir
+
+    print('Making directories...')
+    mkdir_or_exist(out_dir)
+    mkdir_or_exist(osp.join(out_dir, 'images'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
+
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        print('Extracting CHASEDB1.zip...')
+        zip_file = zipfile.ZipFile(dataset_path)
+        zip_file.extractall(tmp_dir)
+
+        print('Generating training dataset...')
+
+        assert len(os.listdir(tmp_dir)) == CHASE_DB1_LEN, \
+            f'len(os.listdir(tmp_dir)) != {CHASE_DB1_LEN}'
+
+        for img_name in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]:
+            img = mmcv.imread(osp.join(tmp_dir, img_name))
+            if osp.splitext(img_name)[1] == '.jpg':
+                mmcv.imwrite(
+                    img,
+                    osp.join(out_dir, 'images', 'training',
+                             osp.splitext(img_name)[0] + '.png'))
+            else:
+                # The annotation img should be divided by 128, because some of
+                # the annotation imgs are not standard. We should set a
+                # threshold to convert the nonstandard annotation imgs. The
+                # value divided by 128 is equivalent to '1 if value >= 128
+                # else 0'
+                mmcv.imwrite(
+                    img[:, :, 0] // 128,
+                    osp.join(out_dir, 'annotations', 'training',
+                             osp.splitext(img_name)[0] + '.png'))
+
+        for img_name in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]:
+            img = mmcv.imread(osp.join(tmp_dir, img_name))
+            if osp.splitext(img_name)[1] == '.jpg':
+                mmcv.imwrite(
+                    img,
+                    osp.join(out_dir, 'images', 'validation',
+                             osp.splitext(img_name)[0] + '.png'))
+            else:
+                mmcv.imwrite(
+                    img[:, :, 0] // 128,
+                    osp.join(out_dir, 'annotations', 'validation',
+                             osp.splitext(img_name)[0] + '.png'))
+
+        print('Removing the temporary files...')
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/dataset_converters/cityscapes.py b/tools/dataset_converters/cityscapes.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d6a80135d906db7330a736ccbcc908e0a6309c6
--- /dev/null
+++ b/tools/dataset_converters/cityscapes.py
@@ -0,0 +1,56 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+
+from cityscapesscripts.preparation.json2labelImg import json2labelImg
+from mmengine.utils import (mkdir_or_exist, scandir, track_parallel_progress,
+                            track_progress)
+
+
+def convert_json_to_label(json_file):
+    label_file = json_file.replace('_polygons.json', '_labelTrainIds.png')
+    json2labelImg(json_file, label_file, 'trainIds')
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert Cityscapes annotations to TrainIds')
+    parser.add_argument('cityscapes_path', help='cityscapes data path')
+    parser.add_argument('--gt-dir', default='gtFine', type=str)
+    parser.add_argument('-o', '--out-dir', help='output path')
+    parser.add_argument(
+        '--nproc', default=1, type=int, help='number of process')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    cityscapes_path = args.cityscapes_path
+    out_dir = args.out_dir if args.out_dir else cityscapes_path
+    mkdir_or_exist(out_dir)
+
+    gt_dir = osp.join(cityscapes_path, args.gt_dir)
+
+    poly_files = []
+    for poly in scandir(gt_dir, '_polygons.json', recursive=True):
+        poly_file = osp.join(gt_dir, poly)
+        poly_files.append(poly_file)
+    if args.nproc > 1:
+        track_parallel_progress(convert_json_to_label, poly_files, args.nproc)
+    else:
+        track_progress(convert_json_to_label, poly_files)
+
+    split_names = ['train', 'val', 'test']
+
+    for split in split_names:
+        filenames = []
+        for poly in scandir(
+                osp.join(gt_dir, split), '_polygons.json', recursive=True):
+            filenames.append(poly.replace('_gtFine_polygons.json', ''))
+        with open(osp.join(out_dir, f'{split}.txt'), 'w') as f:
+            f.writelines(f + '\n' for f in filenames)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/dataset_converters/coco_stuff10k.py b/tools/dataset_converters/coco_stuff10k.py
new file mode 100644
index 0000000000000000000000000000000000000000..920127ee10fc09b76f8e2344ecdf3b7800d51802
--- /dev/null
+++ b/tools/dataset_converters/coco_stuff10k.py
@@ -0,0 +1,308 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+import shutil
+from functools import partial
+
+import numpy as np
+from mmengine.utils import (mkdir_or_exist, track_parallel_progress,
+                            track_progress)
+from PIL import Image
+from scipy.io import loadmat
+
+COCO_LEN = 10000
+
+clsID_to_trID = {
+    0: 0,
+    1: 1,
+    2: 2,
+    3: 3,
+    4: 4,
+    5: 5,
+    6: 6,
+    7: 7,
+    8: 8,
+    9: 9,
+    10: 10,
+    11: 11,
+    13: 12,
+    14: 13,
+    15: 14,
+    16: 15,
+    17: 16,
+    18: 17,
+    19: 18,
+    20: 19,
+    21: 20,
+    22: 21,
+    23: 22,
+    24: 23,
+    25: 24,
+    27: 25,
+    28: 26,
+    31: 27,
+    32: 28,
+    33: 29,
+    34: 30,
+    35: 31,
+    36: 32,
+    37: 33,
+    38: 34,
+    39: 35,
+    40: 36,
+    41: 37,
+    42: 38,
+    43: 39,
+    44: 40,
+    46: 41,
+    47: 42,
+    48: 43,
+    49: 44,
+    50: 45,
+    51: 46,
+    52: 47,
+    53: 48,
+    54: 49,
+    55: 50,
+    56: 51,
+    57: 52,
+    58: 53,
+    59: 54,
+    60: 55,
+    61: 56,
+    62: 57,
+    63: 58,
+    64: 59,
+    65: 60,
+    67: 61,
+    70: 62,
+    72: 63,
+    73: 64,
+    74: 65,
+    75: 66,
+    76: 67,
+    77: 68,
+    78: 69,
+    79: 70,
+    80: 71,
+    81: 72,
+    82: 73,
+    84: 74,
+    85: 75,
+    86: 76,
+    87: 77,
+    88: 78,
+    89: 79,
+    90: 80,
+    92: 81,
+    93: 82,
+    94: 83,
+    95: 84,
+    96: 85,
+    97: 86,
+    98: 87,
+    99: 88,
+    100: 89,
+    101: 90,
+    102: 91,
+    103: 92,
+    104: 93,
+    105: 94,
+    106: 95,
+    107: 96,
+    108: 97,
+    109: 98,
+    110: 99,
+    111: 100,
+    112: 101,
+    113: 102,
+    114: 103,
+    115: 104,
+    116: 105,
+    117: 106,
+    118: 107,
+    119: 108,
+    120: 109,
+    121: 110,
+    122: 111,
+    123: 112,
+    124: 113,
+    125: 114,
+    126: 115,
+    127: 116,
+    128: 117,
+    129: 118,
+    130: 119,
+    131: 120,
+    132: 121,
+    133: 122,
+    134: 123,
+    135: 124,
+    136: 125,
+    137: 126,
+    138: 127,
+    139: 128,
+    140: 129,
+    141: 130,
+    142: 131,
+    143: 132,
+    144: 133,
+    145: 134,
+    146: 135,
+    147: 136,
+    148: 137,
+    149: 138,
+    150: 139,
+    151: 140,
+    152: 141,
+    153: 142,
+    154: 143,
+    155: 144,
+    156: 145,
+    157: 146,
+    158: 147,
+    159: 148,
+    160: 149,
+    161: 150,
+    162: 151,
+    163: 152,
+    164: 153,
+    165: 154,
+    166: 155,
+    167: 156,
+    168: 157,
+    169: 158,
+    170: 159,
+    171: 160,
+    172: 161,
+    173: 162,
+    174: 163,
+    175: 164,
+    176: 165,
+    177: 166,
+    178: 167,
+    179: 168,
+    180: 169,
+    181: 170,
+    182: 171
+}
+
+
+def convert_to_trainID(tuple_path, in_img_dir, in_ann_dir, out_img_dir,
+                       out_mask_dir, is_train):
+    imgpath, maskpath = tuple_path
+    shutil.copyfile(
+        osp.join(in_img_dir, imgpath),
+        osp.join(out_img_dir, 'train2014', imgpath) if is_train else osp.join(
+            out_img_dir, 'test2014', imgpath))
+    annotate = loadmat(osp.join(in_ann_dir, maskpath))
+    mask = annotate['S'].astype(np.uint8)
+    mask_copy = mask.copy()
+    for clsID, trID in clsID_to_trID.items():
+        mask_copy[mask == clsID] = trID
+    seg_filename = osp.join(out_mask_dir, 'train2014',
+                            maskpath.split('.')[0] +
+                            '_labelTrainIds.png') if is_train else osp.join(
+                                out_mask_dir, 'test2014',
+                                maskpath.split('.')[0] + '_labelTrainIds.png')
+    Image.fromarray(mask_copy).save(seg_filename, 'PNG')
+
+
+def generate_coco_list(folder):
+    train_list = osp.join(folder, 'imageLists', 'train.txt')
+    test_list = osp.join(folder, 'imageLists', 'test.txt')
+    train_paths = []
+    test_paths = []
+
+    with open(train_list) as f:
+        for filename in f:
+            basename = filename.strip()
+            imgpath = basename + '.jpg'
+            maskpath = basename + '.mat'
+            train_paths.append((imgpath, maskpath))
+
+    with open(test_list) as f:
+        for filename in f:
+            basename = filename.strip()
+            imgpath = basename + '.jpg'
+            maskpath = basename + '.mat'
+            test_paths.append((imgpath, maskpath))
+
+    return train_paths, test_paths
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description=\
+        'Convert COCO Stuff 10k annotations to mmsegmentation format')  # noqa
+    parser.add_argument('coco_path', help='coco stuff path')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    parser.add_argument(
+        '--nproc', default=16, type=int, help='number of process')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    coco_path = args.coco_path
+    nproc = args.nproc
+
+    out_dir = args.out_dir or coco_path
+    out_img_dir = osp.join(out_dir, 'images')
+    out_mask_dir = osp.join(out_dir, 'annotations')
+
+    mkdir_or_exist(osp.join(out_img_dir, 'train2014'))
+    mkdir_or_exist(osp.join(out_img_dir, 'test2014'))
+    mkdir_or_exist(osp.join(out_mask_dir, 'train2014'))
+    mkdir_or_exist(osp.join(out_mask_dir, 'test2014'))
+
+    train_list, test_list = generate_coco_list(coco_path)
+    assert (len(train_list) +
+            len(test_list)) == COCO_LEN, 'Wrong length of list {} & {}'.format(
+                len(train_list), len(test_list))
+
+    if args.nproc > 1:
+        track_parallel_progress(
+            partial(
+                convert_to_trainID,
+                in_img_dir=osp.join(coco_path, 'images'),
+                in_ann_dir=osp.join(coco_path, 'annotations'),
+                out_img_dir=out_img_dir,
+                out_mask_dir=out_mask_dir,
+                is_train=True),
+            train_list,
+            nproc=nproc)
+        track_parallel_progress(
+            partial(
+                convert_to_trainID,
+                in_img_dir=osp.join(coco_path, 'images'),
+                in_ann_dir=osp.join(coco_path, 'annotations'),
+                out_img_dir=out_img_dir,
+                out_mask_dir=out_mask_dir,
+                is_train=False),
+            test_list,
+            nproc=nproc)
+    else:
+        track_progress(
+            partial(
+                convert_to_trainID,
+                in_img_dir=osp.join(coco_path, 'images'),
+                in_ann_dir=osp.join(coco_path, 'annotations'),
+                out_img_dir=out_img_dir,
+                out_mask_dir=out_mask_dir,
+                is_train=True), train_list)
+        track_progress(
+            partial(
+                convert_to_trainID,
+                in_img_dir=osp.join(coco_path, 'images'),
+                in_ann_dir=osp.join(coco_path, 'annotations'),
+                out_img_dir=out_img_dir,
+                out_mask_dir=out_mask_dir,
+                is_train=False), test_list)
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/dataset_converters/coco_stuff164k.py b/tools/dataset_converters/coco_stuff164k.py
new file mode 100644
index 0000000000000000000000000000000000000000..a13114ab1e0c37675369b2e9ba065cbfb2dca1e7
--- /dev/null
+++ b/tools/dataset_converters/coco_stuff164k.py
@@ -0,0 +1,265 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+import shutil
+from functools import partial
+from glob import glob
+
+import numpy as np
+from mmengine.utils import (mkdir_or_exist, track_parallel_progress,
+                            track_progress)
+from PIL import Image
+
+COCO_LEN = 123287
+
+clsID_to_trID = {
+    0: 0,
+    1: 1,
+    2: 2,
+    3: 3,
+    4: 4,
+    5: 5,
+    6: 6,
+    7: 7,
+    8: 8,
+    9: 9,
+    10: 10,
+    12: 11,
+    13: 12,
+    14: 13,
+    15: 14,
+    16: 15,
+    17: 16,
+    18: 17,
+    19: 18,
+    20: 19,
+    21: 20,
+    22: 21,
+    23: 22,
+    24: 23,
+    26: 24,
+    27: 25,
+    30: 26,
+    31: 27,
+    32: 28,
+    33: 29,
+    34: 30,
+    35: 31,
+    36: 32,
+    37: 33,
+    38: 34,
+    39: 35,
+    40: 36,
+    41: 37,
+    42: 38,
+    43: 39,
+    45: 40,
+    46: 41,
+    47: 42,
+    48: 43,
+    49: 44,
+    50: 45,
+    51: 46,
+    52: 47,
+    53: 48,
+    54: 49,
+    55: 50,
+    56: 51,
+    57: 52,
+    58: 53,
+    59: 54,
+    60: 55,
+    61: 56,
+    62: 57,
+    63: 58,
+    64: 59,
+    66: 60,
+    69: 61,
+    71: 62,
+    72: 63,
+    73: 64,
+    74: 65,
+    75: 66,
+    76: 67,
+    77: 68,
+    78: 69,
+    79: 70,
+    80: 71,
+    81: 72,
+    83: 73,
+    84: 74,
+    85: 75,
+    86: 76,
+    87: 77,
+    88: 78,
+    89: 79,
+    91: 80,
+    92: 81,
+    93: 82,
+    94: 83,
+    95: 84,
+    96: 85,
+    97: 86,
+    98: 87,
+    99: 88,
+    100: 89,
+    101: 90,
+    102: 91,
+    103: 92,
+    104: 93,
+    105: 94,
+    106: 95,
+    107: 96,
+    108: 97,
+    109: 98,
+    110: 99,
+    111: 100,
+    112: 101,
+    113: 102,
+    114: 103,
+    115: 104,
+    116: 105,
+    117: 106,
+    118: 107,
+    119: 108,
+    120: 109,
+    121: 110,
+    122: 111,
+    123: 112,
+    124: 113,
+    125: 114,
+    126: 115,
+    127: 116,
+    128: 117,
+    129: 118,
+    130: 119,
+    131: 120,
+    132: 121,
+    133: 122,
+    134: 123,
+    135: 124,
+    136: 125,
+    137: 126,
+    138: 127,
+    139: 128,
+    140: 129,
+    141: 130,
+    142: 131,
+    143: 132,
+    144: 133,
+    145: 134,
+    146: 135,
+    147: 136,
+    148: 137,
+    149: 138,
+    150: 139,
+    151: 140,
+    152: 141,
+    153: 142,
+    154: 143,
+    155: 144,
+    156: 145,
+    157: 146,
+    158: 147,
+    159: 148,
+    160: 149,
+    161: 150,
+    162: 151,
+    163: 152,
+    164: 153,
+    165: 154,
+    166: 155,
+    167: 156,
+    168: 157,
+    169: 158,
+    170: 159,
+    171: 160,
+    172: 161,
+    173: 162,
+    174: 163,
+    175: 164,
+    176: 165,
+    177: 166,
+    178: 167,
+    179: 168,
+    180: 169,
+    181: 170,
+    255: 255
+}
+
+
+def convert_to_trainID(maskpath, out_mask_dir, is_train):
+    mask = np.array(Image.open(maskpath))
+    mask_copy = mask.copy()
+    for clsID, trID in clsID_to_trID.items():
+        mask_copy[mask == clsID] = trID
+    seg_filename = osp.join(
+        out_mask_dir, 'train2017',
+        osp.basename(maskpath).split('.')[0] +
+        '_labelTrainIds.png') if is_train else osp.join(
+            out_mask_dir, 'val2017',
+            osp.basename(maskpath).split('.')[0] + '_labelTrainIds.png')
+    Image.fromarray(mask_copy).save(seg_filename, 'PNG')
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description=\
+        'Convert COCO Stuff 164k annotations to mmsegmentation format')  # noqa
+    parser.add_argument('coco_path', help='coco stuff path')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    parser.add_argument(
+        '--nproc', default=16, type=int, help='number of process')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    coco_path = args.coco_path
+    nproc = args.nproc
+
+    out_dir = args.out_dir or coco_path
+    out_img_dir = osp.join(out_dir, 'images')
+    out_mask_dir = osp.join(out_dir, 'annotations')
+
+    mkdir_or_exist(osp.join(out_mask_dir, 'train2017'))
+    mkdir_or_exist(osp.join(out_mask_dir, 'val2017'))
+
+    if out_dir != coco_path:
+        shutil.copytree(osp.join(coco_path, 'images'), out_img_dir)
+
+    train_list = glob(osp.join(coco_path, 'annotations', 'train2017', '*.png'))
+    train_list = [file for file in train_list if '_labelTrainIds' not in file]
+    test_list = glob(osp.join(coco_path, 'annotations', 'val2017', '*.png'))
+    test_list = [file for file in test_list if '_labelTrainIds' not in file]
+    assert (len(train_list) +
+            len(test_list)) == COCO_LEN, 'Wrong length of list {} & {}'.format(
+                len(train_list), len(test_list))
+
+    if args.nproc > 1:
+        track_parallel_progress(
+            partial(
+                convert_to_trainID, out_mask_dir=out_mask_dir, is_train=True),
+            train_list,
+            nproc=nproc)
+        track_parallel_progress(
+            partial(
+                convert_to_trainID, out_mask_dir=out_mask_dir, is_train=False),
+            test_list,
+            nproc=nproc)
+    else:
+        track_progress(
+            partial(
+                convert_to_trainID, out_mask_dir=out_mask_dir, is_train=True),
+            train_list)
+        track_progress(
+            partial(
+                convert_to_trainID, out_mask_dir=out_mask_dir, is_train=False),
+            test_list)
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/dataset_converters/hrf.py b/tools/dataset_converters/hrf.py
new file mode 100644
index 0000000000000000000000000000000000000000..3bfd80c9ee42e3b5cba4a12a6c8b32ddbb2f1f11
--- /dev/null
+++ b/tools/dataset_converters/hrf.py
@@ -0,0 +1,112 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os
+import os.path as osp
+import tempfile
+import zipfile
+
+import mmcv
+from mmengine.utils import mkdir_or_exist
+
+HRF_LEN = 15
+TRAINING_LEN = 5
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert HRF dataset to mmsegmentation format')
+    parser.add_argument('healthy_path', help='the path of healthy.zip')
+    parser.add_argument(
+        'healthy_manualsegm_path', help='the path of healthy_manualsegm.zip')
+    parser.add_argument('glaucoma_path', help='the path of glaucoma.zip')
+    parser.add_argument(
+        'glaucoma_manualsegm_path', help='the path of glaucoma_manualsegm.zip')
+    parser.add_argument(
+        'diabetic_retinopathy_path',
+        help='the path of diabetic_retinopathy.zip')
+    parser.add_argument(
+        'diabetic_retinopathy_manualsegm_path',
+        help='the path of diabetic_retinopathy_manualsegm.zip')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    images_path = [
+        args.healthy_path, args.glaucoma_path, args.diabetic_retinopathy_path
+    ]
+    annotations_path = [
+        args.healthy_manualsegm_path, args.glaucoma_manualsegm_path,
+        args.diabetic_retinopathy_manualsegm_path
+    ]
+    if args.out_dir is None:
+        out_dir = osp.join('data', 'HRF')
+    else:
+        out_dir = args.out_dir
+
+    print('Making directories...')
+    mkdir_or_exist(out_dir)
+    mkdir_or_exist(osp.join(out_dir, 'images'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
+
+    print('Generating images...')
+    for now_path in images_path:
+        with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+            zip_file = zipfile.ZipFile(now_path)
+            zip_file.extractall(tmp_dir)
+
+            assert len(os.listdir(tmp_dir)) == HRF_LEN, \
+                f'len(os.listdir(tmp_dir)) != {HRF_LEN}'
+
+            for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]:
+                img = mmcv.imread(osp.join(tmp_dir, filename))
+                mmcv.imwrite(
+                    img,
+                    osp.join(out_dir, 'images', 'training',
+                             osp.splitext(filename)[0] + '.png'))
+            for filename in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]:
+                img = mmcv.imread(osp.join(tmp_dir, filename))
+                mmcv.imwrite(
+                    img,
+                    osp.join(out_dir, 'images', 'validation',
+                             osp.splitext(filename)[0] + '.png'))
+
+    print('Generating annotations...')
+    for now_path in annotations_path:
+        with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+            zip_file = zipfile.ZipFile(now_path)
+            zip_file.extractall(tmp_dir)
+
+            assert len(os.listdir(tmp_dir)) == HRF_LEN, \
+                f'len(os.listdir(tmp_dir)) != {HRF_LEN}'
+
+            for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]:
+                img = mmcv.imread(osp.join(tmp_dir, filename))
+                # The annotation img should be divided by 128, because some of
+                # the annotation imgs are not standard. We should set a
+                # threshold to convert the nonstandard annotation imgs. The
+                # value divided by 128 is equivalent to '1 if value >= 128
+                # else 0'
+                mmcv.imwrite(
+                    img[:, :, 0] // 128,
+                    osp.join(out_dir, 'annotations', 'training',
+                             osp.splitext(filename)[0] + '.png'))
+            for filename in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]:
+                img = mmcv.imread(osp.join(tmp_dir, filename))
+                mmcv.imwrite(
+                    img[:, :, 0] // 128,
+                    osp.join(out_dir, 'annotations', 'validation',
+                             osp.splitext(filename)[0] + '.png'))
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/dataset_converters/isaid.py b/tools/dataset_converters/isaid.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d5ccd9c776e9621c261e6d168bf6aa4f7b451f6
--- /dev/null
+++ b/tools/dataset_converters/isaid.py
@@ -0,0 +1,246 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import glob
+import os
+import os.path as osp
+import shutil
+import tempfile
+import zipfile
+
+import mmcv
+import numpy as np
+from mmengine.utils import ProgressBar, mkdir_or_exist
+from PIL import Image
+
+iSAID_palette = \
+    {
+        0: (0, 0, 0),
+        1: (0, 0, 63),
+        2: (0, 63, 63),
+        3: (0, 63, 0),
+        4: (0, 63, 127),
+        5: (0, 63, 191),
+        6: (0, 63, 255),
+        7: (0, 127, 63),
+        8: (0, 127, 127),
+        9: (0, 0, 127),
+        10: (0, 0, 191),
+        11: (0, 0, 255),
+        12: (0, 191, 127),
+        13: (0, 127, 191),
+        14: (0, 127, 255),
+        15: (0, 100, 155)
+    }
+
+iSAID_invert_palette = {v: k for k, v in iSAID_palette.items()}
+
+
+def iSAID_convert_from_color(arr_3d, palette=iSAID_invert_palette):
+    """RGB-color encoding to grayscale labels."""
+    arr_2d = np.zeros((arr_3d.shape[0], arr_3d.shape[1]), dtype=np.uint8)
+
+    for c, i in palette.items():
+        m = np.all(arr_3d == np.array(c).reshape(1, 1, 3), axis=2)
+        arr_2d[m] = i
+
+    return arr_2d
+
+
+def slide_crop_image(src_path, out_dir, mode, patch_H, patch_W, overlap):
+    img = np.asarray(Image.open(src_path).convert('RGB'))
+
+    img_H, img_W, _ = img.shape
+
+    if img_H < patch_H and img_W > patch_W:
+
+        img = mmcv.impad(img, shape=(patch_H, img_W), pad_val=0)
+
+        img_H, img_W, _ = img.shape
+
+    elif img_H > patch_H and img_W < patch_W:
+
+        img = mmcv.impad(img, shape=(img_H, patch_W), pad_val=0)
+
+        img_H, img_W, _ = img.shape
+
+    elif img_H < patch_H and img_W < patch_W:
+
+        img = mmcv.impad(img, shape=(patch_H, patch_W), pad_val=0)
+
+        img_H, img_W, _ = img.shape
+
+    for x in range(0, img_W, patch_W - overlap):
+        for y in range(0, img_H, patch_H - overlap):
+            x_str = x
+            x_end = x + patch_W
+            if x_end > img_W:
+                diff_x = x_end - img_W
+                x_str -= diff_x
+                x_end = img_W
+            y_str = y
+            y_end = y + patch_H
+            if y_end > img_H:
+                diff_y = y_end - img_H
+                y_str -= diff_y
+                y_end = img_H
+
+            img_patch = img[y_str:y_end, x_str:x_end, :]
+            img_patch = Image.fromarray(img_patch.astype(np.uint8))
+            image = osp.basename(src_path).split('.')[0] + '_' + str(
+                y_str) + '_' + str(y_end) + '_' + str(x_str) + '_' + str(
+                    x_end) + '.png'
+            # print(image)
+            save_path_image = osp.join(out_dir, 'img_dir', mode, str(image))
+            img_patch.save(save_path_image, format='BMP')
+
+
+def slide_crop_label(src_path, out_dir, mode, patch_H, patch_W, overlap):
+    label = mmcv.imread(src_path, channel_order='rgb')
+    label = iSAID_convert_from_color(label)
+    img_H, img_W = label.shape
+
+    if img_H < patch_H and img_W > patch_W:
+
+        label = mmcv.impad(label, shape=(patch_H, img_W), pad_val=255)
+
+        img_H = patch_H
+
+    elif img_H > patch_H and img_W < patch_W:
+
+        label = mmcv.impad(label, shape=(img_H, patch_W), pad_val=255)
+
+        img_W = patch_W
+
+    elif img_H < patch_H and img_W < patch_W:
+
+        label = mmcv.impad(label, shape=(patch_H, patch_W), pad_val=255)
+
+        img_H = patch_H
+        img_W = patch_W
+
+    for x in range(0, img_W, patch_W - overlap):
+        for y in range(0, img_H, patch_H - overlap):
+            x_str = x
+            x_end = x + patch_W
+            if x_end > img_W:
+                diff_x = x_end - img_W
+                x_str -= diff_x
+                x_end = img_W
+            y_str = y
+            y_end = y + patch_H
+            if y_end > img_H:
+                diff_y = y_end - img_H
+                y_str -= diff_y
+                y_end = img_H
+
+            lab_patch = label[y_str:y_end, x_str:x_end]
+            lab_patch = Image.fromarray(lab_patch.astype(np.uint8), mode='P')
+
+            image = osp.basename(src_path).split('.')[0].split(
+                '_')[0] + '_' + str(y_str) + '_' + str(y_end) + '_' + str(
+                    x_str) + '_' + str(x_end) + '_instance_color_RGB' + '.png'
+            lab_patch.save(osp.join(out_dir, 'ann_dir', mode, str(image)))
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert iSAID dataset to mmsegmentation format')
+    parser.add_argument('dataset_path', help='iSAID folder path')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+
+    parser.add_argument(
+        '--patch_width',
+        default=896,
+        type=int,
+        help='Width of the cropped image patch')
+    parser.add_argument(
+        '--patch_height',
+        default=896,
+        type=int,
+        help='Height of the cropped image patch')
+    parser.add_argument(
+        '--overlap_area', default=384, type=int, help='Overlap area')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    dataset_path = args.dataset_path
+    # image patch width and height
+    patch_H, patch_W = args.patch_width, args.patch_height
+
+    overlap = args.overlap_area  # overlap area
+
+    if args.out_dir is None:
+        out_dir = osp.join('data', 'iSAID')
+    else:
+        out_dir = args.out_dir
+
+    print('Making directories...')
+    mkdir_or_exist(osp.join(out_dir, 'img_dir', 'train'))
+    mkdir_or_exist(osp.join(out_dir, 'img_dir', 'val'))
+    mkdir_or_exist(osp.join(out_dir, 'img_dir', 'test'))
+
+    mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'train'))
+    mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'val'))
+    mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'test'))
+
+    assert os.path.exists(os.path.join(dataset_path, 'train')), \
+        f'train is not in {dataset_path}'
+    assert os.path.exists(os.path.join(dataset_path, 'val')), \
+        f'val is not in {dataset_path}'
+    assert os.path.exists(os.path.join(dataset_path, 'test')), \
+        f'test is not in {dataset_path}'
+
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        for dataset_mode in ['train', 'val', 'test']:
+
+            # for dataset_mode in [ 'test']:
+            print(f'Extracting  {dataset_mode}ing.zip...')
+            img_zipp_list = glob.glob(
+                os.path.join(dataset_path, dataset_mode, 'images', '*.zip'))
+            print('Find the data', img_zipp_list)
+            for img_zipp in img_zipp_list:
+                zip_file = zipfile.ZipFile(img_zipp)
+                zip_file.extractall(os.path.join(tmp_dir, dataset_mode, 'img'))
+            src_path_list = glob.glob(
+                os.path.join(tmp_dir, dataset_mode, 'img', 'images', '*.png'))
+
+            src_prog_bar = ProgressBar(len(src_path_list))
+            for i, img_path in enumerate(src_path_list):
+                if dataset_mode != 'test':
+                    slide_crop_image(img_path, out_dir, dataset_mode, patch_H,
+                                     patch_W, overlap)
+
+                else:
+                    shutil.move(img_path,
+                                os.path.join(out_dir, 'img_dir', dataset_mode))
+                src_prog_bar.update()
+
+            if dataset_mode != 'test':
+                label_zipp_list = glob.glob(
+                    os.path.join(dataset_path, dataset_mode, 'Semantic_masks',
+                                 '*.zip'))
+                for label_zipp in label_zipp_list:
+                    zip_file = zipfile.ZipFile(label_zipp)
+                    zip_file.extractall(
+                        os.path.join(tmp_dir, dataset_mode, 'lab'))
+
+                lab_path_list = glob.glob(
+                    os.path.join(tmp_dir, dataset_mode, 'lab', 'images',
+                                 '*.png'))
+                lab_prog_bar = ProgressBar(len(lab_path_list))
+                for i, lab_path in enumerate(lab_path_list):
+                    slide_crop_label(lab_path, out_dir, dataset_mode, patch_H,
+                                     patch_W, overlap)
+                    lab_prog_bar.update()
+
+        print('Removing the temporary files...')
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/dataset_converters/levircd.py b/tools/dataset_converters/levircd.py
new file mode 100644
index 0000000000000000000000000000000000000000..8717f3e856ba3f171b511f34d0217e1fda87ccb6
--- /dev/null
+++ b/tools/dataset_converters/levircd.py
@@ -0,0 +1,99 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import glob
+import math
+import os
+import os.path as osp
+
+import mmcv
+import numpy as np
+from mmengine.utils import ProgressBar
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert levir-cd dataset to mmsegmentation format')
+    parser.add_argument('--dataset_path', help='potsdam folder path')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    parser.add_argument(
+        '--clip_size',
+        type=int,
+        help='clipped size of image after preparation',
+        default=256)
+    parser.add_argument(
+        '--stride_size',
+        type=int,
+        help='stride of clipping original images',
+        default=256)
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    input_folder = args.dataset_path
+    png_files = glob.glob(
+        os.path.join(input_folder, '**/*.png'), recursive=True)
+    output_folder = args.out_dir
+    prog_bar = ProgressBar(len(png_files))
+    for png_file in png_files:
+        new_path = os.path.join(
+            output_folder,
+            os.path.relpath(os.path.dirname(png_file), input_folder))
+        os.makedirs(os.path.dirname(new_path), exist_ok=True)
+        label = False
+        if 'label' in png_file:
+            label = True
+        clip_big_image(png_file, new_path, args, label)
+        prog_bar.update()
+
+
+def clip_big_image(image_path, clip_save_dir, args, to_label=False):
+    image = mmcv.imread(image_path)
+
+    h, w, c = image.shape
+    clip_size = args.clip_size
+    stride_size = args.stride_size
+
+    num_rows = math.ceil((h - clip_size) / stride_size) if math.ceil(
+        (h - clip_size) /
+        stride_size) * stride_size + clip_size >= h else math.ceil(
+            (h - clip_size) / stride_size) + 1
+    num_cols = math.ceil((w - clip_size) / stride_size) if math.ceil(
+        (w - clip_size) /
+        stride_size) * stride_size + clip_size >= w else math.ceil(
+            (w - clip_size) / stride_size) + 1
+
+    x, y = np.meshgrid(np.arange(num_cols + 1), np.arange(num_rows + 1))
+    xmin = x * clip_size
+    ymin = y * clip_size
+
+    xmin = xmin.ravel()
+    ymin = ymin.ravel()
+    xmin_offset = np.where(xmin + clip_size > w, w - xmin - clip_size,
+                           np.zeros_like(xmin))
+    ymin_offset = np.where(ymin + clip_size > h, h - ymin - clip_size,
+                           np.zeros_like(ymin))
+    boxes = np.stack([
+        xmin + xmin_offset, ymin + ymin_offset,
+        np.minimum(xmin + clip_size, w),
+        np.minimum(ymin + clip_size, h)
+    ],
+                     axis=1)
+
+    if to_label:
+        image[image == 255] = 1
+        image = image[:, :, 0]
+    for box in boxes:
+        start_x, start_y, end_x, end_y = box
+        clipped_image = image[start_y:end_y, start_x:end_x] \
+            if to_label else image[start_y:end_y, start_x:end_x, :]
+        idx = osp.basename(image_path).split('.')[0]
+        mmcv.imwrite(
+            clipped_image.astype(np.uint8),
+            osp.join(clip_save_dir,
+                     f'{idx}_{start_x}_{start_y}_{end_x}_{end_y}.png'))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/dataset_converters/loveda.py b/tools/dataset_converters/loveda.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b0ef4bb8bbd07f60dfc0397e9659f0200b96f5d
--- /dev/null
+++ b/tools/dataset_converters/loveda.py
@@ -0,0 +1,73 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os
+import os.path as osp
+import shutil
+import tempfile
+import zipfile
+
+from mmengine.utils import mkdir_or_exist
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert LoveDA dataset to mmsegmentation format')
+    parser.add_argument('dataset_path', help='LoveDA folder path')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    dataset_path = args.dataset_path
+    if args.out_dir is None:
+        out_dir = osp.join('data', 'loveDA')
+    else:
+        out_dir = args.out_dir
+
+    print('Making directories...')
+    mkdir_or_exist(out_dir)
+    mkdir_or_exist(osp.join(out_dir, 'img_dir'))
+    mkdir_or_exist(osp.join(out_dir, 'img_dir', 'train'))
+    mkdir_or_exist(osp.join(out_dir, 'img_dir', 'val'))
+    mkdir_or_exist(osp.join(out_dir, 'img_dir', 'test'))
+    mkdir_or_exist(osp.join(out_dir, 'ann_dir'))
+    mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'train'))
+    mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'val'))
+
+    assert 'Train.zip' in os.listdir(dataset_path), \
+        f'Train.zip is not in {dataset_path}'
+    assert 'Val.zip' in os.listdir(dataset_path), \
+        f'Val.zip is not in {dataset_path}'
+    assert 'Test.zip' in os.listdir(dataset_path), \
+        f'Test.zip is not in {dataset_path}'
+
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        for dataset in ['Train', 'Val', 'Test']:
+            zip_file = zipfile.ZipFile(
+                os.path.join(dataset_path, dataset + '.zip'))
+            zip_file.extractall(tmp_dir)
+            data_type = dataset.lower()
+            for location in ['Rural', 'Urban']:
+                for image_type in ['images_png', 'masks_png']:
+                    if image_type == 'images_png':
+                        dst = osp.join(out_dir, 'img_dir', data_type)
+                    else:
+                        dst = osp.join(out_dir, 'ann_dir', data_type)
+                    if dataset == 'Test' and image_type == 'masks_png':
+                        continue
+                    else:
+                        src_dir = osp.join(tmp_dir, dataset, location,
+                                           image_type)
+                        src_lst = os.listdir(src_dir)
+                        for file in src_lst:
+                            shutil.move(osp.join(src_dir, file), dst)
+        print('Removing the temporary files...')
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/dataset_converters/nyu.py b/tools/dataset_converters/nyu.py
new file mode 100644
index 0000000000000000000000000000000000000000..49e09e7af6844b709e681f6d9f4df14ed547a00c
--- /dev/null
+++ b/tools/dataset_converters/nyu.py
@@ -0,0 +1,89 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+import shutil
+import tempfile
+import zipfile
+
+from mmengine.utils import mkdir_or_exist
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert NYU Depth dataset to mmsegmentation format')
+    parser.add_argument('raw_data', help='the path of raw data')
+    parser.add_argument(
+        '-o', '--out_dir', help='output path', default='./data/nyu')
+    args = parser.parse_args()
+    return args
+
+
+def reorganize(raw_data_dir: str, out_dir: str):
+    """Reorganize NYU Depth dataset files into the required directory
+    structure.
+
+    Args:
+        raw_data_dir (str): Path to the raw data directory.
+        out_dir (str): Output directory for the organized dataset.
+    """
+
+    def move_data(data_list, dst_prefix, fname_func):
+        """Move data files from source to destination directory.
+
+        Args:
+            data_list (list): List of data file paths.
+            dst_prefix (str): Prefix to be added to destination paths.
+            fname_func (callable): Function to process file names
+        """
+        for data_item in data_list:
+            data_item = data_item.strip().strip('/')
+            new_item = fname_func(data_item)
+            shutil.move(
+                osp.join(raw_data_dir, data_item),
+                osp.join(out_dir, dst_prefix, new_item))
+
+    def process_phase(phase):
+        """Process a dataset phase (e.g., 'train' or 'test')."""
+        with open(osp.join(raw_data_dir, f'nyu_{phase}.txt')) as f:
+            data = filter(lambda x: len(x.strip()) > 0, f.readlines())
+            data = map(lambda x: x.split()[:2], data)
+            images, annos = zip(*data)
+
+            move_data(images, f'images/{phase}',
+                      lambda x: x.replace('/rgb', ''))
+            move_data(annos, f'annotations/{phase}',
+                      lambda x: x.replace('/sync_depth', ''))
+
+    process_phase('train')
+    process_phase('test')
+
+
+def main():
+    args = parse_args()
+
+    print('Making directories...')
+    mkdir_or_exist(args.out_dir)
+    for subdir in [
+            'images/train', 'images/test', 'annotations/train',
+            'annotations/test'
+    ]:
+        mkdir_or_exist(osp.join(args.out_dir, subdir))
+
+    print('Generating images and annotations...')
+
+    if args.raw_data.endswith('.zip'):
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            zip_file = zipfile.ZipFile(args.raw_data)
+            zip_file.extractall(tmp_dir)
+            reorganize(osp.join(tmp_dir, 'nyu'), args.out_dir)
+    else:
+        assert osp.isdir(
+            args.raw_data
+        ), 'the argument --raw-data should be either a zip file or directory.'
+        reorganize(args.raw_data, args.out_dir)
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/dataset_converters/pascal_context.py b/tools/dataset_converters/pascal_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..a92d1dc6411137b92fe67fbde0fc554060194085
--- /dev/null
+++ b/tools/dataset_converters/pascal_context.py
@@ -0,0 +1,87 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+from functools import partial
+
+import numpy as np
+from detail import Detail
+from mmengine.utils import mkdir_or_exist, track_progress
+from PIL import Image
+
+_mapping = np.sort(
+    np.array([
+        0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25, 284,
+        158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45, 46, 308, 59,
+        440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458, 34, 207, 80, 355,
+        85, 347, 220, 349, 360, 98, 187, 104, 105, 366, 189, 368, 113, 115
+    ]))
+_key = np.array(range(len(_mapping))).astype('uint8')
+
+
+def generate_labels(img_id, detail, out_dir):
+
+    def _class_to_index(mask, _mapping, _key):
+        # assert the values
+        values = np.unique(mask)
+        for i in range(len(values)):
+            assert (values[i] in _mapping)
+        index = np.digitize(mask.ravel(), _mapping, right=True)
+        return _key[index].reshape(mask.shape)
+
+    mask = Image.fromarray(
+        _class_to_index(detail.getMask(img_id), _mapping=_mapping, _key=_key))
+    filename = img_id['file_name']
+    mask.save(osp.join(out_dir, filename.replace('jpg', 'png')))
+    return osp.splitext(osp.basename(filename))[0]
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert PASCAL VOC annotations to mmsegmentation format')
+    parser.add_argument('devkit_path', help='pascal voc devkit path')
+    parser.add_argument('json_path', help='annoation json filepath')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    devkit_path = args.devkit_path
+    if args.out_dir is None:
+        out_dir = osp.join(devkit_path, 'VOC2010', 'SegmentationClassContext')
+    else:
+        out_dir = args.out_dir
+    json_path = args.json_path
+    mkdir_or_exist(out_dir)
+    img_dir = osp.join(devkit_path, 'VOC2010', 'JPEGImages')
+
+    train_detail = Detail(json_path, img_dir, 'train')
+    train_ids = train_detail.getImgs()
+
+    val_detail = Detail(json_path, img_dir, 'val')
+    val_ids = val_detail.getImgs()
+
+    mkdir_or_exist(
+        osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext'))
+
+    train_list = track_progress(
+        partial(generate_labels, detail=train_detail, out_dir=out_dir),
+        train_ids)
+    with open(
+            osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext',
+                     'train.txt'), 'w') as f:
+        f.writelines(line + '\n' for line in sorted(train_list))
+
+    val_list = track_progress(
+        partial(generate_labels, detail=val_detail, out_dir=out_dir), val_ids)
+    with open(
+            osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext',
+                     'val.txt'), 'w') as f:
+        f.writelines(line + '\n' for line in sorted(val_list))
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/dataset_converters/potsdam.py b/tools/dataset_converters/potsdam.py
new file mode 100644
index 0000000000000000000000000000000000000000..f3c713ee2a08d2f6eaf68fb225899504b8f4e829
--- /dev/null
+++ b/tools/dataset_converters/potsdam.py
@@ -0,0 +1,158 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import glob
+import math
+import os
+import os.path as osp
+import tempfile
+import zipfile
+
+import mmcv
+import numpy as np
+from mmengine.utils import ProgressBar, mkdir_or_exist
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert potsdam dataset to mmsegmentation format')
+    parser.add_argument('dataset_path', help='potsdam folder path')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    parser.add_argument(
+        '--clip_size',
+        type=int,
+        help='clipped size of image after preparation',
+        default=512)
+    parser.add_argument(
+        '--stride_size',
+        type=int,
+        help='stride of clipping original images',
+        default=256)
+    args = parser.parse_args()
+    return args
+
+
+def clip_big_image(image_path, clip_save_dir, args, to_label=False):
+    # Original image of Potsdam dataset is very large, thus pre-processing
+    # of them is adopted. Given fixed clip size and stride size to generate
+    # clipped image, the intersection　of width and height is determined.
+    # For example, given one 5120 x 5120 original image, the clip size is
+    # 512 and stride size is 256, thus it would generate 20x20 = 400 images
+    # whose size are all 512x512.
+    image = mmcv.imread(image_path)
+
+    h, w, c = image.shape
+    clip_size = args.clip_size
+    stride_size = args.stride_size
+
+    num_rows = math.ceil((h - clip_size) / stride_size) if math.ceil(
+        (h - clip_size) /
+        stride_size) * stride_size + clip_size >= h else math.ceil(
+            (h - clip_size) / stride_size) + 1
+    num_cols = math.ceil((w - clip_size) / stride_size) if math.ceil(
+        (w - clip_size) /
+        stride_size) * stride_size + clip_size >= w else math.ceil(
+            (w - clip_size) / stride_size) + 1
+
+    x, y = np.meshgrid(np.arange(num_cols + 1), np.arange(num_rows + 1))
+    xmin = x * clip_size
+    ymin = y * clip_size
+
+    xmin = xmin.ravel()
+    ymin = ymin.ravel()
+    xmin_offset = np.where(xmin + clip_size > w, w - xmin - clip_size,
+                           np.zeros_like(xmin))
+    ymin_offset = np.where(ymin + clip_size > h, h - ymin - clip_size,
+                           np.zeros_like(ymin))
+    boxes = np.stack([
+        xmin + xmin_offset, ymin + ymin_offset,
+        np.minimum(xmin + clip_size, w),
+        np.minimum(ymin + clip_size, h)
+    ],
+                     axis=1)
+
+    if to_label:
+        color_map = np.array([[0, 0, 0], [255, 255, 255], [255, 0, 0],
+                              [255, 255, 0], [0, 255, 0], [0, 255, 255],
+                              [0, 0, 255]])
+        flatten_v = np.matmul(
+            image.reshape(-1, c),
+            np.array([2, 3, 4]).reshape(3, 1))
+        out = np.zeros_like(flatten_v)
+        for idx, class_color in enumerate(color_map):
+            value_idx = np.matmul(class_color,
+                                  np.array([2, 3, 4]).reshape(3, 1))
+            out[flatten_v == value_idx] = idx
+        image = out.reshape(h, w)
+
+    for box in boxes:
+        start_x, start_y, end_x, end_y = box
+        clipped_image = image[start_y:end_y,
+                              start_x:end_x] if to_label else image[
+                                  start_y:end_y, start_x:end_x, :]
+        idx_i, idx_j = osp.basename(image_path).split('_')[2:4]
+        mmcv.imwrite(
+            clipped_image.astype(np.uint8),
+            osp.join(
+                clip_save_dir,
+                f'{idx_i}_{idx_j}_{start_x}_{start_y}_{end_x}_{end_y}.png'))
+
+
+def main():
+    args = parse_args()
+    splits = {
+        'train': [
+            '2_10', '2_11', '2_12', '3_10', '3_11', '3_12', '4_10', '4_11',
+            '4_12', '5_10', '5_11', '5_12', '6_10', '6_11', '6_12', '6_7',
+            '6_8', '6_9', '7_10', '7_11', '7_12', '7_7', '7_8', '7_9'
+        ],
+        'val': [
+            '5_15', '6_15', '6_13', '3_13', '4_14', '6_14', '5_14', '2_13',
+            '4_15', '2_14', '5_13', '4_13', '3_14', '7_13'
+        ]
+    }
+
+    dataset_path = args.dataset_path
+    if args.out_dir is None:
+        out_dir = osp.join('data', 'potsdam')
+    else:
+        out_dir = args.out_dir
+
+    print('Making directories...')
+    mkdir_or_exist(osp.join(out_dir, 'img_dir', 'train'))
+    mkdir_or_exist(osp.join(out_dir, 'img_dir', 'val'))
+    mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'train'))
+    mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'val'))
+
+    zipp_list = glob.glob(os.path.join(dataset_path, '*.zip'))
+    print('Find the data', zipp_list)
+
+    for zipp in zipp_list:
+        with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+            zip_file = zipfile.ZipFile(zipp)
+            zip_file.extractall(tmp_dir)
+            src_path_list = glob.glob(os.path.join(tmp_dir, '*.tif'))
+            if not len(src_path_list):
+                sub_tmp_dir = os.path.join(tmp_dir, os.listdir(tmp_dir)[0])
+                src_path_list = glob.glob(os.path.join(sub_tmp_dir, '*.tif'))
+
+            prog_bar = ProgressBar(len(src_path_list))
+            for i, src_path in enumerate(src_path_list):
+                idx_i, idx_j = osp.basename(src_path).split('_')[2:4]
+                data_type = 'train' if f'{idx_i}_{idx_j}' in splits[
+                    'train'] else 'val'
+                if 'label' in src_path:
+                    dst_dir = osp.join(out_dir, 'ann_dir', data_type)
+                    clip_big_image(src_path, dst_dir, args, to_label=True)
+                else:
+                    dst_dir = osp.join(out_dir, 'img_dir', data_type)
+                    clip_big_image(src_path, dst_dir, args, to_label=False)
+                prog_bar.update()
+
+    print('Removing the temporary files...')
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/dataset_converters/refuge.py b/tools/dataset_converters/refuge.py
new file mode 100644
index 0000000000000000000000000000000000000000..1186866ab3fd58c4d72e5f573938053a8d7c80b2
--- /dev/null
+++ b/tools/dataset_converters/refuge.py
@@ -0,0 +1,110 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os
+import os.path as osp
+import tempfile
+import zipfile
+
+import mmcv
+import numpy as np
+from mmengine.utils import mkdir_or_exist
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert REFUGE dataset to mmsegmentation format')
+    parser.add_argument('--raw_data_root', help='the root path of raw data')
+
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+
+
+def extract_img(root: str,
+                cur_dir: str,
+                out_dir: str,
+                mode: str = 'train',
+                file_type: str = 'img') -> None:
+    """_summary_
+
+    Args:
+       Args:
+        root (str): root where the extracted data is saved
+        cur_dir (cur_dir): dir where the zip_file exists
+        out_dir (str): root dir where the data is saved
+
+        mode (str, optional): Defaults to 'train'.
+        file_type (str, optional): Defaults to 'img',else to 'mask'.
+    """
+    zip_file = zipfile.ZipFile(cur_dir)
+    zip_file.extractall(root)
+    for cur_dir, dirs, files in os.walk(root):
+        # filter child dirs and directories with "Illustration" and "MACOSX"
+        if len(dirs) == 0 and \
+                cur_dir.split('\\')[-1].find('Illustration') == -1 and \
+                cur_dir.find('MACOSX') == -1:
+
+            file_names = [
+                file for file in files
+                if file.endswith('.jpg') or file.endswith('.bmp')
+            ]
+            for filename in sorted(file_names):
+                img = mmcv.imread(osp.join(cur_dir, filename))
+
+                if file_type == 'annotations':
+                    img = img[:, :, 0]
+                    img[np.where(img == 0)] = 1
+                    img[np.where(img == 128)] = 2
+                    img[np.where(img == 255)] = 0
+                mmcv.imwrite(
+                    img,
+                    osp.join(out_dir, file_type, mode,
+                             osp.splitext(filename)[0] + '.png'))
+
+
+def main():
+    args = parse_args()
+
+    raw_data_root = args.raw_data_root
+    if args.out_dir is None:
+        out_dir = osp.join('./data', 'REFUGE')
+
+    else:
+        out_dir = args.out_dir
+
+    print('Making directories...')
+    mkdir_or_exist(out_dir)
+    mkdir_or_exist(osp.join(out_dir, 'images'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'test'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'test'))
+
+    print('Generating images and annotations...')
+    # process data from the child dir on the first rank
+    cur_dir, dirs, files = list(os.walk(raw_data_root))[0]
+    print('====================')
+
+    files = list(filter(lambda x: x.endswith('.zip'), files))
+
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        for file in files:
+            # search data folders for training,validation,test
+            mode = list(
+                filter(lambda x: file.lower().find(x) != -1,
+                       ['training', 'test', 'validation']))[0]
+            file_root = osp.join(tmp_dir, file[:-4])
+            file_type = 'images' if file.find('Anno') == -1 and file.find(
+                'GT') == -1 else 'annotations'
+            extract_img(file_root, osp.join(cur_dir, file), out_dir, mode,
+                        file_type)
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/dataset_converters/stare.py b/tools/dataset_converters/stare.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a23ba4dd8a4744bca9d1a506c79131c0e42c73d
--- /dev/null
+++ b/tools/dataset_converters/stare.py
@@ -0,0 +1,167 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import gzip
+import os
+import os.path as osp
+import tarfile
+import tempfile
+
+import mmcv
+from mmengine.utils import mkdir_or_exist
+
+STARE_LEN = 20
+TRAINING_LEN = 10
+
+
+def un_gz(src, dst):
+    g_file = gzip.GzipFile(src)
+    with open(dst, 'wb+') as f:
+        f.write(g_file.read())
+    g_file.close()
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert STARE dataset to mmsegmentation format')
+    parser.add_argument('image_path', help='the path of stare-images.tar')
+    parser.add_argument('labels_ah', help='the path of labels-ah.tar')
+    parser.add_argument('labels_vk', help='the path of labels-vk.tar')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    image_path = args.image_path
+    labels_ah = args.labels_ah
+    labels_vk = args.labels_vk
+    if args.out_dir is None:
+        out_dir = osp.join('data', 'STARE')
+    else:
+        out_dir = args.out_dir
+
+    print('Making directories...')
+    mkdir_or_exist(out_dir)
+    mkdir_or_exist(osp.join(out_dir, 'images'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
+
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        mkdir_or_exist(osp.join(tmp_dir, 'gz'))
+        mkdir_or_exist(osp.join(tmp_dir, 'files'))
+
+        print('Extracting stare-images.tar...')
+        with tarfile.open(image_path) as f:
+            f.extractall(osp.join(tmp_dir, 'gz'))
+
+        for filename in os.listdir(osp.join(tmp_dir, 'gz')):
+            un_gz(
+                osp.join(tmp_dir, 'gz', filename),
+                osp.join(tmp_dir, 'files',
+                         osp.splitext(filename)[0]))
+
+        now_dir = osp.join(tmp_dir, 'files')
+
+        assert len(os.listdir(now_dir)) == STARE_LEN, \
+            f'len(os.listdir(now_dir)) != {STARE_LEN}'
+
+        for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]:
+            img = mmcv.imread(osp.join(now_dir, filename))
+            mmcv.imwrite(
+                img,
+                osp.join(out_dir, 'images', 'training',
+                         osp.splitext(filename)[0] + '.png'))
+
+        for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]:
+            img = mmcv.imread(osp.join(now_dir, filename))
+            mmcv.imwrite(
+                img,
+                osp.join(out_dir, 'images', 'validation',
+                         osp.splitext(filename)[0] + '.png'))
+
+        print('Removing the temporary files...')
+
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        mkdir_or_exist(osp.join(tmp_dir, 'gz'))
+        mkdir_or_exist(osp.join(tmp_dir, 'files'))
+
+        print('Extracting labels-ah.tar...')
+        with tarfile.open(labels_ah) as f:
+            f.extractall(osp.join(tmp_dir, 'gz'))
+
+        for filename in os.listdir(osp.join(tmp_dir, 'gz')):
+            un_gz(
+                osp.join(tmp_dir, 'gz', filename),
+                osp.join(tmp_dir, 'files',
+                         osp.splitext(filename)[0]))
+
+        now_dir = osp.join(tmp_dir, 'files')
+
+        assert len(os.listdir(now_dir)) == STARE_LEN, \
+            f'len(os.listdir(now_dir)) != {STARE_LEN}'
+
+        for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]:
+            img = mmcv.imread(osp.join(now_dir, filename))
+            # The annotation img should be divided by 128, because some of
+            # the annotation imgs are not standard. We should set a threshold
+            # to convert the nonstandard annotation imgs. The value divided by
+            # 128 equivalent to '1 if value >= 128 else 0'
+            mmcv.imwrite(
+                img[:, :, 0] // 128,
+                osp.join(out_dir, 'annotations', 'training',
+                         osp.splitext(filename)[0] + '.png'))
+
+        for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]:
+            img = mmcv.imread(osp.join(now_dir, filename))
+            mmcv.imwrite(
+                img[:, :, 0] // 128,
+                osp.join(out_dir, 'annotations', 'validation',
+                         osp.splitext(filename)[0] + '.png'))
+
+        print('Removing the temporary files...')
+
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        mkdir_or_exist(osp.join(tmp_dir, 'gz'))
+        mkdir_or_exist(osp.join(tmp_dir, 'files'))
+
+        print('Extracting labels-vk.tar...')
+        with tarfile.open(labels_vk) as f:
+            f.extractall(osp.join(tmp_dir, 'gz'))
+
+        for filename in os.listdir(osp.join(tmp_dir, 'gz')):
+            un_gz(
+                osp.join(tmp_dir, 'gz', filename),
+                osp.join(tmp_dir, 'files',
+                         osp.splitext(filename)[0]))
+
+        now_dir = osp.join(tmp_dir, 'files')
+
+        assert len(os.listdir(now_dir)) == STARE_LEN, \
+            f'len(os.listdir(now_dir)) != {STARE_LEN}'
+
+        for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]:
+            img = mmcv.imread(osp.join(now_dir, filename))
+            mmcv.imwrite(
+                img[:, :, 0] // 128,
+                osp.join(out_dir, 'annotations', 'training',
+                         osp.splitext(filename)[0] + '.png'))
+
+        for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]:
+            img = mmcv.imread(osp.join(now_dir, filename))
+            mmcv.imwrite(
+                img[:, :, 0] // 128,
+                osp.join(out_dir, 'annotations', 'validation',
+                         osp.splitext(filename)[0] + '.png'))
+
+        print('Removing the temporary files...')
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/dataset_converters/synapse.py b/tools/dataset_converters/synapse.py
new file mode 100644
index 0000000000000000000000000000000000000000..42dac6b7eff94107b8b3a59984622cb1fd2e7599
--- /dev/null
+++ b/tools/dataset_converters/synapse.py
@@ -0,0 +1,155 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+
+import nibabel as nib
+import numpy as np
+from mmengine.utils import mkdir_or_exist
+from PIL import Image
+
+
+def read_files_from_txt(txt_path):
+    with open(txt_path) as f:
+        files = f.readlines()
+    files = [file.strip() for file in files]
+    return files
+
+
+def read_nii_file(nii_path):
+    img = nib.load(nii_path).get_fdata()
+    return img
+
+
+def split_3d_image(img):
+    c, _, _ = img.shape
+    res = []
+    for i in range(c):
+        res.append(img[i, :, :])
+    return res
+
+
+def label_mapping(label):
+    """Label mapping from TransUNet paper setting. It only has 9 classes, which
+    are 'background', 'aorta', 'gallbladder', 'left_kidney', 'right_kidney',
+    'liver', 'pancreas', 'spleen', 'stomach', respectively. Other foreground
+    classes in original dataset are all set to background.
+
+    More details could be found here: https://arxiv.org/abs/2102.04306
+    """
+    maped_label = np.zeros_like(label)
+    maped_label[label == 8] = 1
+    maped_label[label == 4] = 2
+    maped_label[label == 3] = 3
+    maped_label[label == 2] = 4
+    maped_label[label == 6] = 5
+    maped_label[label == 11] = 6
+    maped_label[label == 1] = 7
+    maped_label[label == 7] = 8
+    return maped_label
+
+
+def pares_args():
+    parser = argparse.ArgumentParser(
+        description='Convert synapse dataset to mmsegmentation format')
+    parser.add_argument(
+        '--dataset-path', type=str, help='synapse dataset path.')
+    parser.add_argument(
+        '--save-path',
+        default='data/synapse',
+        type=str,
+        help='save path of the dataset.')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = pares_args()
+    dataset_path = args.dataset_path
+    save_path = args.save_path
+
+    if not osp.exists(dataset_path):
+        raise ValueError('The dataset path does not exist. '
+                         'Please enter a correct dataset path.')
+    if not osp.exists(osp.join(dataset_path, 'img')) \
+            or not osp.exists(osp.join(dataset_path, 'label')):
+        raise FileNotFoundError('The dataset structure is incorrect. '
+                                'Please check your dataset.')
+
+    train_id = read_files_from_txt(osp.join(dataset_path, 'train.txt'))
+    train_id = [idx[3:7] for idx in train_id]
+
+    test_id = read_files_from_txt(osp.join(dataset_path, 'val.txt'))
+    test_id = [idx[3:7] for idx in test_id]
+
+    mkdir_or_exist(osp.join(save_path, 'img_dir/train'))
+    mkdir_or_exist(osp.join(save_path, 'img_dir/val'))
+    mkdir_or_exist(osp.join(save_path, 'ann_dir/train'))
+    mkdir_or_exist(osp.join(save_path, 'ann_dir/val'))
+
+    # It follows data preparation pipeline from here:
+    # https://github.com/Beckschen/TransUNet/tree/main/datasets
+    for i, idx in enumerate(train_id):
+        img_3d = read_nii_file(
+            osp.join(dataset_path, 'img', 'img' + idx + '.nii.gz'))
+        label_3d = read_nii_file(
+            osp.join(dataset_path, 'label', 'label' + idx + '.nii.gz'))
+
+        img_3d = np.clip(img_3d, -125, 275)
+        img_3d = (img_3d + 125) / 400
+        img_3d *= 255
+        img_3d = np.transpose(img_3d, [2, 0, 1])
+        img_3d = np.flip(img_3d, 2)
+
+        label_3d = np.transpose(label_3d, [2, 0, 1])
+        label_3d = np.flip(label_3d, 2)
+        label_3d = label_mapping(label_3d)
+
+        for c in range(img_3d.shape[0]):
+            img = img_3d[c]
+            label = label_3d[c]
+
+            img = Image.fromarray(img).convert('RGB')
+            label = Image.fromarray(label).convert('L')
+            img.save(
+                osp.join(
+                    save_path, 'img_dir/train', 'case' + idx.zfill(4) +
+                    '_slice' + str(c).zfill(3) + '.jpg'))
+            label.save(
+                osp.join(
+                    save_path, 'ann_dir/train', 'case' + idx.zfill(4) +
+                    '_slice' + str(c).zfill(3) + '.png'))
+
+    for i, idx in enumerate(test_id):
+        img_3d = read_nii_file(
+            osp.join(dataset_path, 'img', 'img' + idx + '.nii.gz'))
+        label_3d = read_nii_file(
+            osp.join(dataset_path, 'label', 'label' + idx + '.nii.gz'))
+
+        img_3d = np.clip(img_3d, -125, 275)
+        img_3d = (img_3d + 125) / 400
+        img_3d *= 255
+        img_3d = np.transpose(img_3d, [2, 0, 1])
+        img_3d = np.flip(img_3d, 2)
+
+        label_3d = np.transpose(label_3d, [2, 0, 1])
+        label_3d = np.flip(label_3d, 2)
+        label_3d = label_mapping(label_3d)
+
+        for c in range(img_3d.shape[0]):
+            img = img_3d[c]
+            label = label_3d[c]
+
+            img = Image.fromarray(img).convert('RGB')
+            label = Image.fromarray(label).convert('L')
+            img.save(
+                osp.join(
+                    save_path, 'img_dir/val', 'case' + idx.zfill(4) +
+                    '_slice' + str(c).zfill(3) + '.jpg'))
+            label.save(
+                osp.join(
+                    save_path, 'ann_dir/val', 'case' + idx.zfill(4) +
+                    '_slice' + str(c).zfill(3) + '.png'))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/dataset_converters/voc_aug.py b/tools/dataset_converters/voc_aug.py
new file mode 100644
index 0000000000000000000000000000000000000000..a536f4290d06e4a6c3c9fa8dbadfda847fec583b
--- /dev/null
+++ b/tools/dataset_converters/voc_aug.py
@@ -0,0 +1,92 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+from functools import partial
+
+import numpy as np
+from mmengine.utils import mkdir_or_exist, scandir, track_parallel_progress
+from PIL import Image
+from scipy.io import loadmat
+
+AUG_LEN = 10582
+
+
+def convert_mat(mat_file, in_dir, out_dir):
+    data = loadmat(osp.join(in_dir, mat_file))
+    mask = data['GTcls'][0]['Segmentation'][0].astype(np.uint8)
+    seg_filename = osp.join(out_dir, mat_file.replace('.mat', '.png'))
+    Image.fromarray(mask).save(seg_filename, 'PNG')
+
+
+def generate_aug_list(merged_list, excluded_list):
+    return list(set(merged_list) - set(excluded_list))
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert PASCAL VOC annotations to mmsegmentation format')
+    parser.add_argument('devkit_path', help='pascal voc devkit path')
+    parser.add_argument('aug_path', help='pascal voc aug path')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    parser.add_argument(
+        '--nproc', default=1, type=int, help='number of process')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    devkit_path = args.devkit_path
+    aug_path = args.aug_path
+    nproc = args.nproc
+    if args.out_dir is None:
+        out_dir = osp.join(devkit_path, 'VOC2012', 'SegmentationClassAug')
+    else:
+        out_dir = args.out_dir
+    mkdir_or_exist(out_dir)
+    in_dir = osp.join(aug_path, 'dataset', 'cls')
+
+    track_parallel_progress(
+        partial(convert_mat, in_dir=in_dir, out_dir=out_dir),
+        list(scandir(in_dir, suffix='.mat')),
+        nproc=nproc)
+
+    full_aug_list = []
+    with open(osp.join(aug_path, 'dataset', 'train.txt')) as f:
+        full_aug_list += [line.strip() for line in f]
+    with open(osp.join(aug_path, 'dataset', 'val.txt')) as f:
+        full_aug_list += [line.strip() for line in f]
+
+    with open(
+            osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation',
+                     'train.txt')) as f:
+        ori_train_list = [line.strip() for line in f]
+    with open(
+            osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation',
+                     'val.txt')) as f:
+        val_list = [line.strip() for line in f]
+
+    aug_train_list = generate_aug_list(ori_train_list + full_aug_list,
+                                       val_list)
+    assert len(aug_train_list) == AUG_LEN, 'len(aug_train_list) != {}'.format(
+        AUG_LEN)
+
+    with open(
+            osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation',
+                     'trainaug.txt'), 'w') as f:
+        f.writelines(line + '\n' for line in aug_train_list)
+
+    aug_list = generate_aug_list(full_aug_list, ori_train_list + val_list)
+    assert len(aug_list) == AUG_LEN - len(
+        ori_train_list), 'len(aug_list) != {}'.format(AUG_LEN -
+                                                      len(ori_train_list))
+    with open(
+            osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', 'aug.txt'),
+            'w') as f:
+        f.writelines(line + '\n' for line in aug_list)
+
+    print('Done!')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/dataset_tools/create_dataset.py b/tools/dataset_tools/create_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f65eaa6c432529801f550f2b79aa39299940c29
--- /dev/null
+++ b/tools/dataset_tools/create_dataset.py
@@ -0,0 +1,185 @@
+import os
+from glob import glob
+from typing import List, Literal
+import shutil
+from PIL import Image
+import json
+import numpy as np
+from rich.progress import track
+import cv2
+from vegseg.datasets import GrassDataset
+from sklearn.model_selection import train_test_split
+import argparse
+
+
+def give_color_to_mask(mask: np.ndarray, palette: List[int]) -> Image.Image:
+    """
+    Convert mask to color image
+    Args:
+        mask (np.ndarray): numpy array of shape (H, W)
+        palette (List[int]): list of RGB values
+    return:
+      color_mask (Image.Image): PIL Image of shape (H, W)
+    """
+    im = Image.fromarray(mask).convert("P")
+    im.putpalette(palette)
+    # exit(0)
+    return im
+
+
+def get_mask_by_json(filename: str) -> np.ndarray:
+    """
+    Convert json to mask
+    Args:
+        filename (str): path to json file
+    return:
+      mask (np.ndarray): numpy array of shape (H, W)
+    """
+
+    json_file = json.load(open(filename))
+    img_height = json_file["imageHeight"]
+    img_width = json_file["imageWidth"]
+    mask = np.zeros((img_height, img_width), dtype="int8")
+    for shape in json_file["shapes"]:
+        label = int(shape["label"])
+        label -= 1
+        label = max(label, 0)
+        points = np.array(shape["points"]).astype(np.int32)
+        cv2.fillPoly(mask, [points], label)
+    return mask
+
+
+def json_to_image(json_path, image_path):
+    """
+    Convert json to image
+    Args:
+        json_path (str): path to json file
+        image_path (str): path to save image
+    return: None
+    """
+    mask = get_mask_by_json(json_path)
+    palette_list = GrassDataset.METAINFO["palette"]
+    palette = []
+    for palette_item in palette_list:
+        palette.extend(palette_item)
+    color_mask = give_color_to_mask(mask, palette)
+    color_mask.save(image_path)
+
+
+def create_dataset(
+    image_paths: List[str],
+    ann_paths: List[str],
+    phase: Literal["train", "val"],
+    output_dir: str,
+):
+    """
+    Args:
+        image_paths (List[str]): list of image paths
+        ann_paths (List[str]): list of annotation paths
+        phase (Literal["train", "val"]): train or val
+        output_dir (str): path to save dataset
+    Return: 
+            None
+    """
+    for image_path, ann_path in track(
+        zip(image_paths, ann_paths),
+        description=f"{phase} dataset",
+        total=len(image_paths),
+    ):
+        ann_save_path = os.path.join(
+            output_dir,
+            "ann_dir",
+            phase,
+            os.path.basename(ann_path).replace(".json", ".png"),
+        )
+
+        # 将image复制到指定路径
+        new_image_path = os.path.join(
+            output_dir, "img_dir", phase, os.path.basename(image_path)
+        )
+        shutil.copy(image_path, new_image_path)
+
+        # 将ann保存到指定路径
+        json_to_image(ann_path, ann_save_path)
+
+
+def split_dataset(
+    root_path: str,
+    output_path: str,
+    split_ratio: float = 0.8,
+    shuffle: bool = True,
+    seed: int = 42,
+) -> None:
+    """
+    Split a dataset into train, test, and validation sets.
+
+    Args:
+        root_path (str): Path to the dataset. The dataset should be organized as follows:
+            dataset_path/
+                image1.tif
+                image2.tif
+                ...
+                imageN.tif
+                label1.tif
+                label2.tif
+                ...
+                labelN.tif
+        output_path (str): Path to the output directory where the split dataset will be saved.
+        split_ratio (float, optional): Ratio of the dataset to be used for training. Defaults to 0.8.
+        seed (int, optional): Seed for the random number generator. Defaults to 42.
+    """
+    image_paths = glob(os.path.join(root_path, "*.tif"))
+    ann_paths = [filename.replace("tif", "json") for filename in image_paths]
+    assert len(image_paths) == len(
+        ann_paths
+    ), "Number of images and annotations do not match"
+    print(f"images: {len(image_paths)}, annotations: {len(ann_paths)}")
+
+    image_train, image_test, ann_train, ann_test = train_test_split(
+        image_paths,
+        ann_paths,
+        train_size=split_ratio,
+        random_state=seed,
+        shuffle=shuffle,
+    )
+    print(f"train: {len(image_train)}, test: {len(image_test)}")
+
+    os.makedirs(os.path.join(output_path, "img_dir", "train"), exist_ok=True)
+    os.makedirs(os.path.join(output_path, "img_dir", "val"), exist_ok=True)
+    os.makedirs(os.path.join(output_path, "ann_dir", "train"), exist_ok=True)
+    os.makedirs(os.path.join(output_path, "ann_dir", "val"), exist_ok=True)
+
+    create_dataset(image_train, ann_train, "train", output_path)
+    create_dataset(image_test, ann_test, "val", output_path)
+
+
+def main():
+    args = argparse.ArgumentParser()
+    args.add_argument("--root", type=str, default="data/raw_data")
+    args.add_argument("--output", type=str, default="data/grass")
+    args.add_argument("--split_ratio", type=float, default=0.8)
+    args.add_argument("--seed", type=int, default=42)
+    args.add_argument("--shuffle", type=bool, default=True)
+    args = args.parse_args()
+
+    root: str = args.root
+    output_path: str = args.output
+    split_ratio: float = args.split_ratio
+    seed: int = args.seed
+    shuffle: bool = args.shuffle
+
+    split_dataset(
+        root_path=root,
+        output_path=output_path,
+        split_ratio=split_ratio,
+        shuffle=shuffle,
+        seed=seed,
+    )
+
+    print("数据集划分完成")
+
+
+if __name__ == "__main__":
+
+    # 使用示例 : python src/tools/split_dataset.py --root data/raw_data --output data/grass --split_ratio 0.8 --seed 42 --shuffle True
+    main()
diff --git a/tools/dataset_tools/dataset_show.py b/tools/dataset_tools/dataset_show.py
new file mode 100644
index 0000000000000000000000000000000000000000..0969e5d52785e899d8e7868c849cc43dbe581a95
--- /dev/null
+++ b/tools/dataset_tools/dataset_show.py
@@ -0,0 +1,109 @@
+from glob import glob
+import argparse
+import os
+from typing import Tuple, List
+from PIL import Image
+from rich.progress import track
+from vegseg.datasets import GrassDataset
+
+
+def get_args() -> Tuple[str, str, int]:
+    """
+    get args
+    return:
+        --dataset_path: dataset path.
+        --output_path: output path for saving.
+        --num: num of image to show. -1 means all.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--dataset_path", type=str, default="data/grass")
+    parser.add_argument("--output_path", type=str, default="all_dataset.png")
+    parser.add_argument("--num", default=-1, type=int, help="num of image to show")
+    args = parser.parse_args()
+    return args.dataset_path, args.output_path, args.num
+
+
+def get_image_and_mask_paths(
+    dataset_path: str, num: int
+) -> Tuple[List[str], List[str]]:
+    """
+    get image and mask paths from dataset path.
+    return:
+        image_paths: list of image paths.
+        mask_paths: list of mask paths.
+    """
+    image_paths = glob(os.path.join(dataset_path, "img_dir", "*", "*.tif"))
+    if num != -1:
+        image_paths = image_paths[:num]
+    mask_paths = [
+        filename.replace("tif", "png").replace("img_dir", "ann_dir")
+        for filename in image_paths
+    ]
+    return image_paths, mask_paths
+
+
+def get_palette() -> List[int]:
+    """
+    get palette of dataset.
+    return:
+        palette: list of palette.
+    """
+    palette = []
+    palette_list = GrassDataset.METAINFO["palette"]
+    for palette_item in palette_list:
+        palette.extend(palette_item)
+    return palette
+
+
+def paste_image_mask(image_path: str, mask_path: str) -> Image.Image:
+    """
+    paste image and mask together
+    Args:
+        image_path (str): path to image.
+        mask_path (str): path to mask.
+    return:
+        image_mask: image with mask,is Image.
+    """
+    image = Image.open(image_path)
+    mask = Image.open(mask_path).convert("P")
+    palette = get_palette()
+    mask.putpalette(palette)
+    mask = mask.convert("RGB")
+    image_mask = Image.new("RGB", (image.width * 2, image.height))
+    image_mask.paste(image, (0, 0))
+    image_mask.paste(mask, (image.width, 0))
+    return image_mask
+
+
+def paste_all_images(all_images: List[Image.Image], output_path: str) -> None:
+    """
+    paste all images together and save it.
+    Args:
+        all_images (List[Image.Image]): list of image.
+        output_path (str): path to save.
+    Return:
+        None
+    """
+    widths = [image.width for image in all_images]
+    heights = [image.height for image in all_images]
+    width = max(widths)
+    height = sum(heights)
+    all_image = Image.new("RGB", (width, height))
+    for i, image in enumerate(all_images):
+        all_image.paste(image, (0, sum(heights[:i])))
+    all_image.save(output_path)
+
+
+def main():
+    dataset_path, output_path, num = get_args()
+    image_paths, mask_paths = get_image_and_mask_paths(dataset_path, num)
+    all_images = []
+    for image_path, mask_path in zip(image_paths, mask_paths):
+        image_mask = paste_image_mask(image_path, mask_path)
+        all_images.append(image_mask)
+    paste_all_images(all_images, output_path)
+
+
+if __name__ == "__main__":
+    # example usage: python tools/dataset_tools/dataset_show.py --dataset_path data/grass --output_path all_dataset.png
+    main()
diff --git a/tools/dist_test.sh b/tools/dist_test.sh
new file mode 100644
index 0000000000000000000000000000000000000000..89711fd5c02cfc1f0386e5354506d4b74ecac251
--- /dev/null
+++ b/tools/dist_test.sh
@@ -0,0 +1,20 @@
+CONFIG=$1
+CHECKPOINT=$2
+GPUS=$3
+NNODES=${NNODES:-1}
+NODE_RANK=${NODE_RANK:-0}
+PORT=${PORT:-29500}
+MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
+
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+python -m torch.distributed.launch \
+    --nnodes=$NNODES \
+    --node_rank=$NODE_RANK \
+    --master_addr=$MASTER_ADDR \
+    --nproc_per_node=$GPUS \
+    --master_port=$PORT \
+    $(dirname "$0")/test.py \
+    $CONFIG \
+    $CHECKPOINT \
+    --launcher pytorch \
+    ${@:4}
diff --git a/tools/dist_train.sh b/tools/dist_train.sh
new file mode 100644
index 0000000000000000000000000000000000000000..a857df78788edb8841b6f67d74dd0e6cfb77d8ab
--- /dev/null
+++ b/tools/dist_train.sh
@@ -0,0 +1,17 @@
+CONFIG=$1
+GPUS=$2
+NNODES=${NNODES:-1}
+NODE_RANK=${NODE_RANK:-0}
+PORT=${PORT:-29500}
+MASTER_ADDR=${MASTER_ADDR:-"127.0.0.1"}
+
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+python -m torch.distributed.launch \
+    --nnodes=$NNODES \
+    --node_rank=$NODE_RANK \
+    --master_addr=$MASTER_ADDR \
+    --nproc_per_node=$GPUS \
+    --master_port=$PORT \
+    $(dirname "$0")/train.py \
+    $CONFIG \
+    --launcher pytorch ${@:3}
diff --git a/tools/extract_weight.py b/tools/extract_weight.py
new file mode 100644
index 0000000000000000000000000000000000000000..97e6f423d84d23708bb1775fa5a04cea519b72d4
--- /dev/null
+++ b/tools/extract_weight.py
@@ -0,0 +1,44 @@
+import torch
+import os
+import argparse
+
+
+def get_args():
+    parse = argparse.ArgumentParser()
+    parse.add_argument("--weight_path", type=str)
+    parse.add_argument("--save_path", type=str)
+    args = parse.parse_args()
+    return args.weight_path,args.save_path,
+
+
+def main():
+    weight_path, save_path = get_args()
+    weight = torch.load(weight_path, map_location="cpu")
+    state_dict = weight["state_dict"]
+    head_state_dict = {}
+    auxiliary_head_dict = {}
+    backbone_dict = {}
+    neck_dict = {}
+    student_adapter_dict = {}
+    for k, v in state_dict.items():
+        if "decode_head" in k:
+            head_state_dict[k] = v
+        elif "auxiliary_head" in k:
+            auxiliary_head_dict[k] = v
+        elif "backbone" in k:
+            backbone_dict[k] = v
+        elif "neck" in k:
+            neck_dict[k] = v
+        elif "student_adapter" in k:
+            student_adapter_dict[k] = v
+        else:
+            raise ValueError(f"unexpected keys:{k}")
+    torch.save(head_state_dict, os.path.join(save_path,"head.pth"))
+    torch.save(auxiliary_head_dict, os.path.join(save_path,"auxiliary_head.pth"))
+    torch.save(backbone_dict, os.path.join(save_path,"backbone.pth"))
+    torch.save(neck_dict, os.path.join(save_path,"neck.pth"))
+    torch.save(student_adapter_dict, os.path.join(save_path,"student_adapter.pth"))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/misc/browse_dataset.py b/tools/misc/browse_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..7863eb74f2cab53d025afad347f7886a5ce29919
--- /dev/null
+++ b/tools/misc/browse_dataset.py
@@ -0,0 +1,73 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+
+from mmengine import Config, DictAction
+from mmengine.registry import init_default_scope
+from mmengine.utils import ProgressBar
+
+from mmseg.registry import DATASETS, VISUALIZERS
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Browse a dataset')
+    parser.add_argument('config', help='train config file path')
+    parser.add_argument(
+        '--output-dir',
+        default=None,
+        type=str,
+        help='If there is no display interface, you can save it')
+    parser.add_argument('--not-show', default=False, action='store_true')
+    parser.add_argument(
+        '--show-interval',
+        type=float,
+        default=2,
+        help='the interval of show (s)')
+    parser.add_argument(
+        '--cfg-options',
+        nargs='+',
+        action=DictAction,
+        help='override some settings in the used config, the key-value pair '
+        'in xxx=yyy format will be merged into config file. If the value to '
+        'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
+        'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
+        'Note that the quotation marks are necessary and that no white space '
+        'is allowed.')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    cfg = Config.fromfile(args.config)
+    if args.cfg_options is not None:
+        cfg.merge_from_dict(args.cfg_options)
+
+    # register all modules in mmseg into the registries
+    init_default_scope('mmseg')
+
+    dataset = DATASETS.build(cfg.train_dataloader.dataset)
+    cfg.visualizer['save_dir'] = args.output_dir
+    visualizer = VISUALIZERS.build(cfg.visualizer)
+    visualizer.dataset_meta = dataset.METAINFO
+
+    progress_bar = ProgressBar(len(dataset))
+    for item in dataset:
+        img = item['inputs'].permute(1, 2, 0).numpy()
+        data_sample = item['data_samples'].numpy()
+        img_path = osp.basename(item['data_samples'].img_path)
+
+        img = img[..., [2, 1, 0]]  # bgr to rgb
+
+        visualizer.add_datasample(
+            osp.basename(img_path),
+            img,
+            data_sample,
+            show=not args.not_show,
+            wait_time=args.show_interval)
+
+        progress_bar.update()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/misc/print_config.py b/tools/misc/print_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..2a1c024a6a44157a0b0d4d6213d18d67f57a33c5
--- /dev/null
+++ b/tools/misc/print_config.py
@@ -0,0 +1,69 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import warnings
+
+from mmengine import Config, DictAction
+
+from mmseg.apis import init_model
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Print the whole config')
+    parser.add_argument('config', help='config file path')
+    parser.add_argument(
+        '--graph', action='store_true', help='print the models graph')
+    parser.add_argument(
+        '--options',
+        nargs='+',
+        action=DictAction,
+        help="--options is deprecated in favor of --cfg_options' and it will "
+        'not be supported in version v0.22.0. Override some settings in the '
+        'used config, the key-value pair in xxx=yyy format will be merged '
+        'into config file. If the value to be overwritten is a list, it '
+        'should be like key="[a,b]" or key=a,b It also allows nested '
+        'list/tuple values, e.g. key="[(a,b),(c,d)]" Note that the quotation '
+        'marks are necessary and that no white space is allowed.')
+    parser.add_argument(
+        '--cfg-options',
+        nargs='+',
+        action=DictAction,
+        help='override some settings in the used config, the key-value pair '
+        'in xxx=yyy format will be merged into config file. If the value to '
+        'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
+        'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
+        'Note that the quotation marks are necessary and that no white space '
+        'is allowed.')
+    args = parser.parse_args()
+
+    if args.options and args.cfg_options:
+        raise ValueError(
+            '--options and --cfg-options cannot be both '
+            'specified, --options is deprecated in favor of --cfg-options. '
+            '--options will not be supported in version v0.22.0.')
+    if args.options:
+        warnings.warn('--options is deprecated in favor of --cfg-options, '
+                      '--options will not be supported in version v0.22.0.')
+        args.cfg_options = args.options
+
+    return args
+
+
+def main():
+    args = parse_args()
+
+    cfg = Config.fromfile(args.config)
+    if args.cfg_options is not None:
+        cfg.merge_from_dict(args.cfg_options)
+    print(f'Config:\n{cfg.pretty_text}')
+    # dump config
+    cfg.dump('example.py')
+    # dump models graph
+    if args.graph:
+        model = init_model(args.config, device='cpu')
+        print(f'Model graph:\n{str(model)}')
+        with open('example-graph.txt', 'w') as f:
+            f.writelines(str(model))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/misc/publish_model.py b/tools/misc/publish_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..e035ad90e85e0e03d8304c1d5b524c5ac322c644
--- /dev/null
+++ b/tools/misc/publish_model.py
@@ -0,0 +1,50 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import subprocess
+from hashlib import sha256
+
+import torch
+
+BLOCK_SIZE = 128 * 1024
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Process a checkpoint to be published')
+    parser.add_argument('in_file', help='input checkpoint filename')
+    parser.add_argument('out_file', help='output checkpoint filename')
+    args = parser.parse_args()
+    return args
+
+
+def sha256sum(filename: str) -> str:
+    """Compute SHA256 message digest from a file."""
+    hash_func = sha256()
+    byte_array = bytearray(BLOCK_SIZE)
+    memory_view = memoryview(byte_array)
+    with open(filename, 'rb', buffering=0) as file:
+        for block in iter(lambda: file.readinto(memory_view), 0):
+            hash_func.update(memory_view[:block])
+    return hash_func.hexdigest()
+
+
+def process_checkpoint(in_file, out_file):
+    checkpoint = torch.load(in_file, map_location='cpu')
+    # remove optimizer for smaller file size
+    if 'optimizer' in checkpoint:
+        del checkpoint['optimizer']
+    # if it is necessary to remove some sensitive data in checkpoint['meta'],
+    # add the code here.
+    torch.save(checkpoint, out_file)
+    sha = sha256sum(in_file)
+    final_file = out_file.rstrip('.pth') + f'-{sha[:8]}.pth'
+    subprocess.Popen(['mv', out_file, final_file])
+
+
+def main():
+    args = parse_args()
+    process_checkpoint(args.in_file, args.out_file)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/model_converters/clip2mmseg.py b/tools/model_converters/clip2mmseg.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a97e4b04ab45740ee37149d30a85b67245868f5
--- /dev/null
+++ b/tools/model_converters/clip2mmseg.py
@@ -0,0 +1,163 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+from collections import OrderedDict
+
+import mmengine
+import torch
+from mmengine.runner import CheckpointLoader
+
+
+def convert_vitlayer(paras):
+    new_para_name = ''
+    if paras[0] == 'ln_1':
+        new_para_name = '.'.join(['ln1'] + paras[1:])
+    elif paras[0] == 'attn':
+        new_para_name = '.'.join(['attn.attn'] + paras[1:])
+    elif paras[0] == 'ln_2':
+        new_para_name = '.'.join(['ln2'] + paras[1:])
+    elif paras[0] == 'mlp':
+        if paras[1] == 'c_fc':
+            new_para_name = '.'.join(['ffn.layers.0.0'] + paras[-1:])
+        else:
+            new_para_name = '.'.join(['ffn.layers.1'] + paras[-1:])
+    else:
+        print(f'Wrong for {paras}')
+    return new_para_name
+
+
+def convert_translayer(paras):
+    new_para_name = ''
+    if paras[0] == 'attn':
+        new_para_name = '.'.join(['attentions.0.attn'] + paras[1:])
+    elif paras[0] == 'ln_1':
+        new_para_name = '.'.join(['norms.0'] + paras[1:])
+    elif paras[0] == 'ln_2':
+        new_para_name = '.'.join(['norms.1'] + paras[1:])
+    elif paras[0] == 'mlp':
+        if paras[1] == 'c_fc':
+            new_para_name = '.'.join(['ffns.0.layers.0.0'] + paras[2:])
+        elif paras[1] == 'c_proj':
+            new_para_name = '.'.join(['ffns.0.layers.1'] + paras[2:])
+        else:
+            print(f'Wrong for {paras}')
+    else:
+        print(f'Wrong for {paras}')
+    return new_para_name
+
+
+def convert_key_name(ckpt, visual_split):
+    new_ckpt = OrderedDict()
+    for k, v in ckpt.items():
+        key_list = k.split('.')
+        if key_list[0] == 'visual':
+            new_transform_name = 'image_encoder'
+            if key_list[1] == 'class_embedding':
+                new_name = '.'.join([new_transform_name, 'cls_token'])
+            elif key_list[1] == 'positional_embedding':
+                new_name = '.'.join([new_transform_name, 'pos_embed'])
+            elif key_list[1] == 'conv1':
+                new_name = '.'.join([
+                    new_transform_name, 'patch_embed.projection', key_list[2]
+                ])
+            elif key_list[1] == 'ln_pre':
+                new_name = '.'.join(
+                    [new_transform_name, key_list[1], key_list[2]])
+            elif key_list[1] == 'transformer':
+                new_layer_name = 'layers'
+                layer_index = key_list[3]
+                paras = key_list[4:]
+                if int(layer_index) < visual_split:
+                    new_para_name = convert_vitlayer(paras)
+                    new_name = '.'.join([
+                        new_transform_name, new_layer_name, layer_index,
+                        new_para_name
+                    ])
+                else:
+                    new_para_name = convert_translayer(paras)
+                    new_transform_name = 'decode_head.rec_with_attnbias'
+                    new_layer_name = 'layers'
+                    layer_index = str(int(layer_index) - visual_split)
+                    new_name = '.'.join([
+                        new_transform_name, new_layer_name, layer_index,
+                        new_para_name
+                    ])
+            elif key_list[1] == 'proj':
+                new_name = 'decode_head.rec_with_attnbias.proj.weight'
+            elif key_list[1] == 'ln_post':
+                new_name = k.replace('visual', 'decode_head.rec_with_attnbias')
+            else:
+                print(f'pop parameter: {k}')
+                continue
+        else:
+            text_encoder_name = 'text_encoder'
+            if key_list[0] == 'transformer':
+                layer_name = 'transformer'
+                layer_index = key_list[2]
+                paras = key_list[3:]
+                new_para_name = convert_translayer(paras)
+                new_name = '.'.join([
+                    text_encoder_name, layer_name, layer_index, new_para_name
+                ])
+            elif key_list[0] in [
+                    'positional_embedding', 'text_projection', 'bg_embed',
+                    'attn_mask', 'logit_scale', 'token_embedding', 'ln_final'
+            ]:
+                new_name = 'text_encoder.' + k
+            else:
+                print(f'pop parameter: {k}')
+                continue
+        new_ckpt[new_name] = v
+
+    return new_ckpt
+
+
+def convert_tensor(ckpt):
+    cls_token = ckpt['image_encoder.cls_token']
+    new_cls_token = cls_token.unsqueeze(0).unsqueeze(0)
+    ckpt['image_encoder.cls_token'] = new_cls_token
+    pos_embed = ckpt['image_encoder.pos_embed']
+    new_pos_embed = pos_embed.unsqueeze(0)
+    ckpt['image_encoder.pos_embed'] = new_pos_embed
+    proj_weight = ckpt['decode_head.rec_with_attnbias.proj.weight']
+    new_proj_weight = proj_weight.transpose(1, 0)
+    ckpt['decode_head.rec_with_attnbias.proj.weight'] = new_proj_weight
+    return ckpt
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Convert keys in timm pretrained vit models to '
+        'MMSegmentation style.')
+    parser.add_argument('src', help='src model path or url')
+    # The dst path must be a full path of the new checkpoint.
+    parser.add_argument('dst', help='save path')
+    args = parser.parse_args()
+
+    if any([s in args.src for s in ['B-16', 'b16', 'base_patch16']]):
+        visual_split = 9
+    elif any([s in args.src for s in ['L-14', 'l14', 'large_patch14']]):
+        visual_split = 18
+    else:
+        print('Make sure the clip model is ViT-B/16 or ViT-L/14!')
+        visual_split = -1
+    checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu')
+    if isinstance(checkpoint, torch.jit.RecursiveScriptModule):
+        state_dict = checkpoint.state_dict()
+    else:
+        if 'state_dict' in checkpoint:
+            # timm checkpoint
+            state_dict = checkpoint['state_dict']
+        elif 'model' in checkpoint:
+            # deit checkpoint
+            state_dict = checkpoint['model']
+        else:
+            state_dict = checkpoint
+    weight = convert_key_name(state_dict, visual_split)
+    weight = convert_tensor(weight)
+    mmengine.mkdir_or_exist(osp.dirname(args.dst))
+    torch.save(weight, args.dst)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/model_converters/mit2mmseg.py b/tools/model_converters/mit2mmseg.py
new file mode 100644
index 0000000000000000000000000000000000000000..f10cbbf9d40d3656be0d447460c12fc83771c14c
--- /dev/null
+++ b/tools/model_converters/mit2mmseg.py
@@ -0,0 +1,82 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+from collections import OrderedDict
+
+import mmengine
+import torch
+from mmengine.runner import CheckpointLoader
+
+
+def convert_mit(ckpt):
+    new_ckpt = OrderedDict()
+    # Process the concat between q linear weights and kv linear weights
+    for k, v in ckpt.items():
+        if k.startswith('head'):
+            continue
+        # patch embedding conversion
+        elif k.startswith('patch_embed'):
+            stage_i = int(k.split('.')[0].replace('patch_embed', ''))
+            new_k = k.replace(f'patch_embed{stage_i}', f'layers.{stage_i-1}.0')
+            new_v = v
+            if 'proj.' in new_k:
+                new_k = new_k.replace('proj.', 'projection.')
+        # transformer encoder layer conversion
+        elif k.startswith('block'):
+            stage_i = int(k.split('.')[0].replace('block', ''))
+            new_k = k.replace(f'block{stage_i}', f'layers.{stage_i-1}.1')
+            new_v = v
+            if 'attn.q.' in new_k:
+                sub_item_k = k.replace('q.', 'kv.')
+                new_k = new_k.replace('q.', 'attn.in_proj_')
+                new_v = torch.cat([v, ckpt[sub_item_k]], dim=0)
+            elif 'attn.kv.' in new_k:
+                continue
+            elif 'attn.proj.' in new_k:
+                new_k = new_k.replace('proj.', 'attn.out_proj.')
+            elif 'attn.sr.' in new_k:
+                new_k = new_k.replace('sr.', 'sr.')
+            elif 'mlp.' in new_k:
+                string = f'{new_k}-'
+                new_k = new_k.replace('mlp.', 'ffn.layers.')
+                if 'fc1.weight' in new_k or 'fc2.weight' in new_k:
+                    new_v = v.reshape((*v.shape, 1, 1))
+                new_k = new_k.replace('fc1.', '0.')
+                new_k = new_k.replace('dwconv.dwconv.', '1.')
+                new_k = new_k.replace('fc2.', '4.')
+                string += f'{new_k} {v.shape}-{new_v.shape}'
+        # norm layer conversion
+        elif k.startswith('norm'):
+            stage_i = int(k.split('.')[0].replace('norm', ''))
+            new_k = k.replace(f'norm{stage_i}', f'layers.{stage_i-1}.2')
+            new_v = v
+        else:
+            new_k = k
+            new_v = v
+        new_ckpt[new_k] = new_v
+    return new_ckpt
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Convert keys in official pretrained segformer to '
+        'MMSegmentation style.')
+    parser.add_argument('src', help='src model path or url')
+    # The dst path must be a full path of the new checkpoint.
+    parser.add_argument('dst', help='save path')
+    args = parser.parse_args()
+
+    checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu')
+    if 'state_dict' in checkpoint:
+        state_dict = checkpoint['state_dict']
+    elif 'model' in checkpoint:
+        state_dict = checkpoint['model']
+    else:
+        state_dict = checkpoint
+    weight = convert_mit(state_dict)
+    mmengine.mkdir_or_exist(osp.dirname(args.dst))
+    torch.save(weight, args.dst)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/model_converters/san2mmseg.py b/tools/model_converters/san2mmseg.py
new file mode 100644
index 0000000000000000000000000000000000000000..301a46608e0f14df17138922ae3a747aee105372
--- /dev/null
+++ b/tools/model_converters/san2mmseg.py
@@ -0,0 +1,220 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+from collections import OrderedDict
+
+import mmengine
+import torch
+from mmengine.runner import CheckpointLoader
+
+
+def convert_key_name(ckpt):
+    new_ckpt = OrderedDict()
+
+    for k, v in ckpt.items():
+        key_list = k.split('.')
+        if key_list[0] == 'clip_visual_extractor':
+            new_transform_name = 'image_encoder'
+            if key_list[1] == 'class_embedding':
+                new_name = '.'.join([new_transform_name, 'cls_token'])
+            elif key_list[1] == 'positional_embedding':
+                new_name = '.'.join([new_transform_name, 'pos_embed'])
+            elif key_list[1] == 'conv1':
+                new_name = '.'.join([
+                    new_transform_name, 'patch_embed.projection', key_list[2]
+                ])
+            elif key_list[1] == 'ln_pre':
+                new_name = '.'.join(
+                    [new_transform_name, key_list[1], key_list[2]])
+            elif key_list[1] == 'resblocks':
+                new_layer_name = 'layers'
+                layer_index = key_list[2]
+                paras = key_list[3:]
+                if paras[0] == 'ln_1':
+                    new_para_name = '.'.join(['ln1'] + key_list[4:])
+                elif paras[0] == 'attn':
+                    new_para_name = '.'.join(['attn.attn'] + key_list[4:])
+                elif paras[0] == 'ln_2':
+                    new_para_name = '.'.join(['ln2'] + key_list[4:])
+                elif paras[0] == 'mlp':
+                    if paras[1] == 'c_fc':
+                        new_para_name = '.'.join(['ffn.layers.0.0'] +
+                                                 key_list[-1:])
+                    else:
+                        new_para_name = '.'.join(['ffn.layers.1'] +
+                                                 key_list[-1:])
+                new_name = '.'.join([
+                    new_transform_name, new_layer_name, layer_index,
+                    new_para_name
+                ])
+        elif key_list[0] == 'side_adapter_network':
+            decode_head_name = 'decode_head'
+            module_name = 'side_adapter_network'
+            if key_list[1] == 'vit_model':
+                if key_list[2] == 'blocks':
+                    layer_name = 'encode_layers'
+                    layer_index = key_list[3]
+                    paras = key_list[4:]
+                    if paras[0] == 'norm1':
+                        new_para_name = '.'.join(['ln1'] + key_list[5:])
+                    elif paras[0] == 'attn':
+                        new_para_name = '.'.join(key_list[4:])
+                        new_para_name = new_para_name.replace(
+                            'attn.qkv.', 'attn.attn.in_proj_')
+                        new_para_name = new_para_name.replace(
+                            'attn.proj', 'attn.attn.out_proj')
+                    elif paras[0] == 'norm2':
+                        new_para_name = '.'.join(['ln2'] + key_list[5:])
+                    elif paras[0] == 'mlp':
+                        new_para_name = '.'.join(['ffn'] + key_list[5:])
+                        new_para_name = new_para_name.replace(
+                            'fc1', 'layers.0.0')
+                        new_para_name = new_para_name.replace(
+                            'fc2', 'layers.1')
+                    else:
+                        print(f'Wrong for {k}')
+                    new_name = '.'.join([
+                        decode_head_name, module_name, layer_name, layer_index,
+                        new_para_name
+                    ])
+                elif key_list[2] == 'pos_embed':
+                    new_name = '.'.join(
+                        [decode_head_name, module_name, 'pos_embed'])
+                elif key_list[2] == 'patch_embed':
+                    new_name = '.'.join([
+                        decode_head_name, module_name, 'patch_embed',
+                        'projection', key_list[4]
+                    ])
+                else:
+                    print(f'Wrong for {k}')
+            elif key_list[1] == 'query_embed' or key_list[
+                    1] == 'query_pos_embed':
+                new_name = '.'.join(
+                    [decode_head_name, module_name, key_list[1]])
+            elif key_list[1] == 'fusion_layers':
+                layer_name = 'conv_clips'
+                layer_index = key_list[2][-1]
+                paras = '.'.join(key_list[3:])
+                new_para_name = paras.replace('input_proj.0', '0')
+                new_para_name = new_para_name.replace('input_proj.1', '1.conv')
+                new_name = '.'.join([
+                    decode_head_name, module_name, layer_name, layer_index,
+                    new_para_name
+                ])
+            elif key_list[1] == 'mask_decoder':
+                new_name = 'decode_head.' + k
+            else:
+                print(f'Wrong for {k}')
+        elif key_list[0] == 'clip_rec_head':
+            module_name = 'rec_with_attnbias'
+            if key_list[1] == 'proj':
+                new_name = '.'.join(
+                    [decode_head_name, module_name, 'proj.weight'])
+            elif key_list[1] == 'ln_post':
+                new_name = '.'.join(
+                    [decode_head_name, module_name, 'ln_post', key_list[2]])
+            elif key_list[1] == 'resblocks':
+                new_layer_name = 'layers'
+                layer_index = key_list[2]
+                paras = key_list[3:]
+                if paras[0] == 'ln_1':
+                    new_para_name = '.'.join(['norms.0'] + paras[1:])
+                elif paras[0] == 'attn':
+                    new_para_name = '.'.join(['attentions.0.attn'] + paras[1:])
+                elif paras[0] == 'ln_2':
+                    new_para_name = '.'.join(['norms.1'] + paras[1:])
+                elif paras[0] == 'mlp':
+                    if paras[1] == 'c_fc':
+                        new_para_name = '.'.join(['ffns.0.layers.0.0'] +
+                                                 paras[2:])
+                    elif paras[1] == 'c_proj':
+                        new_para_name = '.'.join(['ffns.0.layers.1'] +
+                                                 paras[2:])
+                    else:
+                        print(f'Wrong for {k}')
+                new_name = '.'.join([
+                    decode_head_name, module_name, new_layer_name, layer_index,
+                    new_para_name
+                ])
+            else:
+                print(f'Wrong for {k}')
+        elif key_list[0] == 'ov_classifier':
+            text_encoder_name = 'text_encoder'
+            if key_list[1] == 'transformer':
+                layer_name = 'transformer'
+                layer_index = key_list[3]
+                paras = key_list[4:]
+                if paras[0] == 'attn':
+                    new_para_name = '.'.join(['attentions.0.attn'] + paras[1:])
+                elif paras[0] == 'ln_1':
+                    new_para_name = '.'.join(['norms.0'] + paras[1:])
+                elif paras[0] == 'ln_2':
+                    new_para_name = '.'.join(['norms.1'] + paras[1:])
+                elif paras[0] == 'mlp':
+                    if paras[1] == 'c_fc':
+                        new_para_name = '.'.join(['ffns.0.layers.0.0'] +
+                                                 paras[2:])
+                    elif paras[1] == 'c_proj':
+                        new_para_name = '.'.join(['ffns.0.layers.1'] +
+                                                 paras[2:])
+                    else:
+                        print(f'Wrong for {k}')
+                else:
+                    print(f'Wrong for {k}')
+                new_name = '.'.join([
+                    text_encoder_name, layer_name, layer_index, new_para_name
+                ])
+            elif key_list[1] in [
+                    'positional_embedding', 'text_projection', 'bg_embed',
+                    'attn_mask', 'logit_scale', 'token_embedding', 'ln_final'
+            ]:
+                new_name = k.replace('ov_classifier', 'text_encoder')
+            else:
+                print(f'Wrong for {k}')
+        elif key_list[0] == 'criterion':
+            new_name = k
+        else:
+            print(f'Wrong for {k}')
+        new_ckpt[new_name] = v
+    return new_ckpt
+
+
+def convert_tensor(ckpt):
+    cls_token = ckpt['image_encoder.cls_token']
+    new_cls_token = cls_token.unsqueeze(0).unsqueeze(0)
+    ckpt['image_encoder.cls_token'] = new_cls_token
+    pos_embed = ckpt['image_encoder.pos_embed']
+    new_pos_embed = pos_embed.unsqueeze(0)
+    ckpt['image_encoder.pos_embed'] = new_pos_embed
+    proj_weight = ckpt['decode_head.rec_with_attnbias.proj.weight']
+    new_proj_weight = proj_weight.transpose(1, 0)
+    ckpt['decode_head.rec_with_attnbias.proj.weight'] = new_proj_weight
+    return ckpt
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Convert keys in timm pretrained vit models to '
+        'MMSegmentation style.')
+    parser.add_argument('src', help='src model path or url')
+    # The dst path must be a full path of the new checkpoint.
+    parser.add_argument('dst', help='save path')
+    args = parser.parse_args()
+
+    checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu')
+    if 'state_dict' in checkpoint:
+        # timm checkpoint
+        state_dict = checkpoint['state_dict']
+    elif 'model' in checkpoint:
+        # deit checkpoint
+        state_dict = checkpoint['model']
+    else:
+        state_dict = checkpoint
+    weight = convert_key_name(state_dict)
+    weight = convert_tensor(weight)
+    mmengine.mkdir_or_exist(osp.dirname(args.dst))
+    torch.save(weight, args.dst)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/model_converters/stdc2mmseg.py b/tools/model_converters/stdc2mmseg.py
new file mode 100644
index 0000000000000000000000000000000000000000..6ea3b8342f546692f50a8e3c0b740f881058229c
--- /dev/null
+++ b/tools/model_converters/stdc2mmseg.py
@@ -0,0 +1,71 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+
+import mmengine
+import torch
+from mmengine.runner import CheckpointLoader
+
+
+def convert_stdc(ckpt, stdc_type):
+    new_state_dict = {}
+    if stdc_type == 'STDC1':
+        stage_lst = ['0', '1', '2.0', '2.1', '3.0', '3.1', '4.0', '4.1']
+    else:
+        stage_lst = [
+            '0', '1', '2.0', '2.1', '2.2', '2.3', '3.0', '3.1', '3.2', '3.3',
+            '3.4', '4.0', '4.1', '4.2'
+        ]
+    for k, v in ckpt.items():
+        ori_k = k
+        flag = False
+        if 'cp.' in k:
+            k = k.replace('cp.', '')
+        if 'features.' in k:
+            num_layer = int(k.split('.')[1])
+            feature_key_lst = 'features.' + str(num_layer) + '.'
+            stages_key_lst = 'stages.' + stage_lst[num_layer] + '.'
+            k = k.replace(feature_key_lst, stages_key_lst)
+            flag = True
+        if 'conv_list' in k:
+            k = k.replace('conv_list', 'layers')
+            flag = True
+        if 'avd_layer.' in k:
+            if 'avd_layer.0' in k:
+                k = k.replace('avd_layer.0', 'downsample.conv')
+            elif 'avd_layer.1' in k:
+                k = k.replace('avd_layer.1', 'downsample.bn')
+            flag = True
+        if flag:
+            new_state_dict[k] = ckpt[ori_k]
+
+    return new_state_dict
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Convert keys in official pretrained STDC1/2 to '
+        'MMSegmentation style.')
+    parser.add_argument('src', help='src model path')
+    # The dst path must be a full path of the new checkpoint.
+    parser.add_argument('dst', help='save path')
+    parser.add_argument('type', help='model type: STDC1 or STDC2')
+    args = parser.parse_args()
+
+    checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu')
+    if 'state_dict' in checkpoint:
+        state_dict = checkpoint['state_dict']
+    elif 'model' in checkpoint:
+        state_dict = checkpoint['model']
+    else:
+        state_dict = checkpoint
+
+    assert args.type in ['STDC1',
+                         'STDC2'], 'STD type should be STDC1 or STDC2!'
+    weight = convert_stdc(state_dict, args.type)
+    mmengine.mkdir_or_exist(osp.dirname(args.dst))
+    torch.save(weight, args.dst)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/model_converters/swin2mmseg.py b/tools/model_converters/swin2mmseg.py
new file mode 100644
index 0000000000000000000000000000000000000000..d434f9465bbdad6bebc7d5962e8bfaf63c7c9e72
--- /dev/null
+++ b/tools/model_converters/swin2mmseg.py
@@ -0,0 +1,87 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+from collections import OrderedDict
+
+import mmengine
+import torch
+from mmengine.runner import CheckpointLoader
+
+
+def convert_swin(ckpt):
+    new_ckpt = OrderedDict()
+
+    def correct_unfold_reduction_order(x):
+        out_channel, in_channel = x.shape
+        x = x.reshape(out_channel, 4, in_channel // 4)
+        x = x[:, [0, 2, 1, 3], :].transpose(1,
+                                            2).reshape(out_channel, in_channel)
+        return x
+
+    def correct_unfold_norm_order(x):
+        in_channel = x.shape[0]
+        x = x.reshape(4, in_channel // 4)
+        x = x[[0, 2, 1, 3], :].transpose(0, 1).reshape(in_channel)
+        return x
+
+    for k, v in ckpt.items():
+        if k.startswith('head'):
+            continue
+        elif k.startswith('layers'):
+            new_v = v
+            if 'attn.' in k:
+                new_k = k.replace('attn.', 'attn.w_msa.')
+            elif 'mlp.' in k:
+                if 'mlp.fc1.' in k:
+                    new_k = k.replace('mlp.fc1.', 'ffn.layers.0.0.')
+                elif 'mlp.fc2.' in k:
+                    new_k = k.replace('mlp.fc2.', 'ffn.layers.1.')
+                else:
+                    new_k = k.replace('mlp.', 'ffn.')
+            elif 'downsample' in k:
+                new_k = k
+                if 'reduction.' in k:
+                    new_v = correct_unfold_reduction_order(v)
+                elif 'norm.' in k:
+                    new_v = correct_unfold_norm_order(v)
+            else:
+                new_k = k
+            new_k = new_k.replace('layers', 'stages', 1)
+        elif k.startswith('patch_embed'):
+            new_v = v
+            if 'proj' in k:
+                new_k = k.replace('proj', 'projection')
+            else:
+                new_k = k
+        else:
+            new_v = v
+            new_k = k
+
+        new_ckpt[new_k] = new_v
+
+    return new_ckpt
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Convert keys in official pretrained swin models to'
+        'MMSegmentation style.')
+    parser.add_argument('src', help='src model path or url')
+    # The dst path must be a full path of the new checkpoint.
+    parser.add_argument('dst', help='save path')
+    args = parser.parse_args()
+
+    checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu')
+    if 'state_dict' in checkpoint:
+        state_dict = checkpoint['state_dict']
+    elif 'model' in checkpoint:
+        state_dict = checkpoint['model']
+    else:
+        state_dict = checkpoint
+    weight = convert_swin(state_dict)
+    mmengine.mkdir_or_exist(osp.dirname(args.dst))
+    torch.save(weight, args.dst)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/model_converters/twins2mmseg.py b/tools/model_converters/twins2mmseg.py
new file mode 100644
index 0000000000000000000000000000000000000000..647d41784aa07468be4b3f2e183064ad55266ad1
--- /dev/null
+++ b/tools/model_converters/twins2mmseg.py
@@ -0,0 +1,87 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+from collections import OrderedDict
+
+import mmengine
+import torch
+from mmengine.runner import CheckpointLoader
+
+
+def convert_twins(args, ckpt):
+
+    new_ckpt = OrderedDict()
+
+    for k, v in list(ckpt.items()):
+        new_v = v
+        if k.startswith('head'):
+            continue
+        elif k.startswith('patch_embeds'):
+            if 'proj.' in k:
+                new_k = k.replace('proj.', 'projection.')
+            else:
+                new_k = k
+        elif k.startswith('blocks'):
+            # Union
+            if 'attn.q.' in k:
+                new_k = k.replace('q.', 'attn.in_proj_')
+                new_v = torch.cat([v, ckpt[k.replace('attn.q.', 'attn.kv.')]],
+                                  dim=0)
+            elif 'mlp.fc1' in k:
+                new_k = k.replace('mlp.fc1', 'ffn.layers.0.0')
+            elif 'mlp.fc2' in k:
+                new_k = k.replace('mlp.fc2', 'ffn.layers.1')
+            # Only pcpvt
+            elif args.model == 'pcpvt':
+                if 'attn.proj.' in k:
+                    new_k = k.replace('proj.', 'attn.out_proj.')
+                else:
+                    new_k = k
+
+            # Only svt
+            else:
+                if 'attn.proj.' in k:
+                    k_lst = k.split('.')
+                    if int(k_lst[2]) % 2 == 1:
+                        new_k = k.replace('proj.', 'attn.out_proj.')
+                    else:
+                        new_k = k
+                else:
+                    new_k = k
+            new_k = new_k.replace('blocks.', 'layers.')
+        elif k.startswith('pos_block'):
+            new_k = k.replace('pos_block', 'position_encodings')
+            if 'proj.0.' in new_k:
+                new_k = new_k.replace('proj.0.', 'proj.')
+        else:
+            new_k = k
+        if 'attn.kv.' not in k:
+            new_ckpt[new_k] = new_v
+    return new_ckpt
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Convert keys in timm pretrained vit models to '
+        'MMSegmentation style.')
+    parser.add_argument('src', help='src model path or url')
+    # The dst path must be a full path of the new checkpoint.
+    parser.add_argument('dst', help='save path')
+    parser.add_argument('model', help='model: pcpvt or svt')
+    args = parser.parse_args()
+
+    checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu')
+
+    if 'state_dict' in checkpoint:
+        # timm checkpoint
+        state_dict = checkpoint['state_dict']
+    else:
+        state_dict = checkpoint
+
+    weight = convert_twins(args, state_dict)
+    mmengine.mkdir_or_exist(osp.dirname(args.dst))
+    torch.save(weight, args.dst)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/model_converters/vit2mmseg.py b/tools/model_converters/vit2mmseg.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d1f8a427e232290c6dcf490e33f777275dd238a
--- /dev/null
+++ b/tools/model_converters/vit2mmseg.py
@@ -0,0 +1,70 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+from collections import OrderedDict
+
+import mmengine
+import torch
+from mmengine.runner import CheckpointLoader
+
+
+def convert_vit(ckpt):
+
+    new_ckpt = OrderedDict()
+
+    for k, v in ckpt.items():
+        if k.startswith('head'):
+            continue
+        if k.startswith('norm'):
+            new_k = k.replace('norm.', 'ln1.')
+        elif k.startswith('patch_embed'):
+            if 'proj' in k:
+                new_k = k.replace('proj', 'projection')
+            else:
+                new_k = k
+        elif k.startswith('blocks'):
+            if 'norm' in k:
+                new_k = k.replace('norm', 'ln')
+            elif 'mlp.fc1' in k:
+                new_k = k.replace('mlp.fc1', 'ffn.layers.0.0')
+            elif 'mlp.fc2' in k:
+                new_k = k.replace('mlp.fc2', 'ffn.layers.1')
+            elif 'attn.qkv' in k:
+                new_k = k.replace('attn.qkv.', 'attn.attn.in_proj_')
+            elif 'attn.proj' in k:
+                new_k = k.replace('attn.proj', 'attn.attn.out_proj')
+            else:
+                new_k = k
+            new_k = new_k.replace('blocks.', 'layers.')
+        else:
+            new_k = k
+        new_ckpt[new_k] = v
+
+    return new_ckpt
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Convert keys in timm pretrained vit models to '
+        'MMSegmentation style.')
+    parser.add_argument('src', help='src model path or url')
+    # The dst path must be a full path of the new checkpoint.
+    parser.add_argument('dst', help='save path')
+    args = parser.parse_args()
+
+    checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu')
+    if 'state_dict' in checkpoint:
+        # timm checkpoint
+        state_dict = checkpoint['state_dict']
+    elif 'model' in checkpoint:
+        # deit checkpoint
+        state_dict = checkpoint['model']
+    else:
+        state_dict = checkpoint
+    weight = convert_vit(state_dict)
+    mmengine.mkdir_or_exist(osp.dirname(args.dst))
+    torch.save(weight, args.dst)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/model_converters/vitjax2mmseg.py b/tools/model_converters/vitjax2mmseg.py
new file mode 100644
index 0000000000000000000000000000000000000000..81bc2ea020e32d086fc4ce2153cc2bf51edd4d48
--- /dev/null
+++ b/tools/model_converters/vitjax2mmseg.py
@@ -0,0 +1,123 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+
+import mmengine
+import numpy as np
+import torch
+
+
+def vit_jax_to_torch(jax_weights, num_layer=12):
+    torch_weights = dict()
+
+    # patch embedding
+    conv_filters = jax_weights['embedding/kernel']
+    conv_filters = conv_filters.permute(3, 2, 0, 1)
+    torch_weights['patch_embed.projection.weight'] = conv_filters
+    torch_weights['patch_embed.projection.bias'] = jax_weights[
+        'embedding/bias']
+
+    # pos embedding
+    torch_weights['pos_embed'] = jax_weights[
+        'Transformer/posembed_input/pos_embedding']
+
+    # cls token
+    torch_weights['cls_token'] = jax_weights['cls']
+
+    # head
+    torch_weights['ln1.weight'] = jax_weights['Transformer/encoder_norm/scale']
+    torch_weights['ln1.bias'] = jax_weights['Transformer/encoder_norm/bias']
+
+    # transformer blocks
+    for i in range(num_layer):
+        jax_block = f'Transformer/encoderblock_{i}'
+        torch_block = f'layers.{i}'
+
+        # attention norm
+        torch_weights[f'{torch_block}.ln1.weight'] = jax_weights[
+            f'{jax_block}/LayerNorm_0/scale']
+        torch_weights[f'{torch_block}.ln1.bias'] = jax_weights[
+            f'{jax_block}/LayerNorm_0/bias']
+
+        # attention
+        query_weight = jax_weights[
+            f'{jax_block}/MultiHeadDotProductAttention_1/query/kernel']
+        query_bias = jax_weights[
+            f'{jax_block}/MultiHeadDotProductAttention_1/query/bias']
+        key_weight = jax_weights[
+            f'{jax_block}/MultiHeadDotProductAttention_1/key/kernel']
+        key_bias = jax_weights[
+            f'{jax_block}/MultiHeadDotProductAttention_1/key/bias']
+        value_weight = jax_weights[
+            f'{jax_block}/MultiHeadDotProductAttention_1/value/kernel']
+        value_bias = jax_weights[
+            f'{jax_block}/MultiHeadDotProductAttention_1/value/bias']
+
+        qkv_weight = torch.from_numpy(
+            np.stack((query_weight, key_weight, value_weight), 1))
+        qkv_weight = torch.flatten(qkv_weight, start_dim=1)
+        qkv_bias = torch.from_numpy(
+            np.stack((query_bias, key_bias, value_bias), 0))
+        qkv_bias = torch.flatten(qkv_bias, start_dim=0)
+
+        torch_weights[f'{torch_block}.attn.attn.in_proj_weight'] = qkv_weight
+        torch_weights[f'{torch_block}.attn.attn.in_proj_bias'] = qkv_bias
+        to_out_weight = jax_weights[
+            f'{jax_block}/MultiHeadDotProductAttention_1/out/kernel']
+        to_out_weight = torch.flatten(to_out_weight, start_dim=0, end_dim=1)
+        torch_weights[
+            f'{torch_block}.attn.attn.out_proj.weight'] = to_out_weight
+        torch_weights[f'{torch_block}.attn.attn.out_proj.bias'] = jax_weights[
+            f'{jax_block}/MultiHeadDotProductAttention_1/out/bias']
+
+        # mlp norm
+        torch_weights[f'{torch_block}.ln2.weight'] = jax_weights[
+            f'{jax_block}/LayerNorm_2/scale']
+        torch_weights[f'{torch_block}.ln2.bias'] = jax_weights[
+            f'{jax_block}/LayerNorm_2/bias']
+
+        # mlp
+        torch_weights[f'{torch_block}.ffn.layers.0.0.weight'] = jax_weights[
+            f'{jax_block}/MlpBlock_3/Dense_0/kernel']
+        torch_weights[f'{torch_block}.ffn.layers.0.0.bias'] = jax_weights[
+            f'{jax_block}/MlpBlock_3/Dense_0/bias']
+        torch_weights[f'{torch_block}.ffn.layers.1.weight'] = jax_weights[
+            f'{jax_block}/MlpBlock_3/Dense_1/kernel']
+        torch_weights[f'{torch_block}.ffn.layers.1.bias'] = jax_weights[
+            f'{jax_block}/MlpBlock_3/Dense_1/bias']
+
+    # transpose weights
+    for k, v in torch_weights.items():
+        if 'weight' in k and 'patch_embed' not in k and 'ln' not in k:
+            v = v.permute(1, 0)
+        torch_weights[k] = v
+
+    return torch_weights
+
+
+def main():
+    # stole refactoring code from Robin Strudel, thanks
+    parser = argparse.ArgumentParser(
+        description='Convert keys from jax official pretrained vit models to '
+        'MMSegmentation style.')
+    parser.add_argument('src', help='src model path or url')
+    # The dst path must be a full path of the new checkpoint.
+    parser.add_argument('dst', help='save path')
+    args = parser.parse_args()
+
+    jax_weights = np.load(args.src)
+    jax_weights_tensor = {}
+    for key in jax_weights.files:
+        value = torch.from_numpy(jax_weights[key])
+        jax_weights_tensor[key] = value
+    if 'L_16-i21k' in args.src:
+        num_layer = 24
+    else:
+        num_layer = 12
+    torch_weights = vit_jax_to_torch(jax_weights_tensor, num_layer)
+    mmengine.mkdir_or_exist(osp.dirname(args.dst))
+    torch.save(torch_weights, args.dst)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/slurm_test.sh b/tools/slurm_test.sh
new file mode 100644
index 0000000000000000000000000000000000000000..4e6f7bf4e33267f269cf0f455924cb70166ccd4b
--- /dev/null
+++ b/tools/slurm_test.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+set -x
+
+PARTITION=$1
+JOB_NAME=$2
+CONFIG=$3
+CHECKPOINT=$4
+GPUS=${GPUS:-4}
+GPUS_PER_NODE=${GPUS_PER_NODE:-4}
+CPUS_PER_TASK=${CPUS_PER_TASK:-5}
+PY_ARGS=${@:5}
+SRUN_ARGS=${SRUN_ARGS:-""}
+
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+srun -p ${PARTITION} \
+    --job-name=${JOB_NAME} \
+    --gres=gpu:${GPUS_PER_NODE} \
+    --ntasks=${GPUS} \
+    --ntasks-per-node=${GPUS_PER_NODE} \
+    --cpus-per-task=${CPUS_PER_TASK} \
+    --kill-on-bad-exit=1 \
+    ${SRUN_ARGS} \
+    python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
diff --git a/tools/slurm_train.sh b/tools/slurm_train.sh
new file mode 100644
index 0000000000000000000000000000000000000000..ab232105f0309c720ed81a522eca14b6fbd64afd
--- /dev/null
+++ b/tools/slurm_train.sh
@@ -0,0 +1,23 @@
+#!/usr/bin/env bash
+
+set -x
+
+PARTITION=$1
+JOB_NAME=$2
+CONFIG=$3
+GPUS=${GPUS:-4}
+GPUS_PER_NODE=${GPUS_PER_NODE:-4}
+CPUS_PER_TASK=${CPUS_PER_TASK:-5}
+SRUN_ARGS=${SRUN_ARGS:-""}
+PY_ARGS=${@:4}
+
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
+srun -p ${PARTITION} \
+    --job-name=${JOB_NAME} \
+    --gres=gpu:${GPUS_PER_NODE} \
+    --ntasks=${GPUS} \
+    --ntasks-per-node=${GPUS_PER_NODE} \
+    --cpus-per-task=${CPUS_PER_TASK} \
+    --kill-on-bad-exit=1 \
+    ${SRUN_ARGS} \
+    python -u tools/train.py ${CONFIG} --launcher="slurm" ${PY_ARGS}
diff --git a/tools/torchserve/mmseg2torchserve.py b/tools/torchserve/mmseg2torchserve.py
new file mode 100644
index 0000000000000000000000000000000000000000..23f99638e799fd0b37a6737cc833dd7d24f611f8
--- /dev/null
+++ b/tools/torchserve/mmseg2torchserve.py
@@ -0,0 +1,112 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from argparse import ArgumentParser, Namespace
+from pathlib import Path
+from tempfile import TemporaryDirectory
+
+from mmengine import Config
+from mmengine.utils import mkdir_or_exist
+
+try:
+    from model_archiver.model_packaging import package_model
+    from model_archiver.model_packaging_utils import ModelExportUtils
+except ImportError:
+    package_model = None
+
+
+def mmseg2torchserve(
+    config_file: str,
+    checkpoint_file: str,
+    output_folder: str,
+    model_name: str,
+    model_version: str = '1.0',
+    force: bool = False,
+):
+    """Converts mmsegmentation model (config + checkpoint) to TorchServe
+    `.mar`.
+
+    Args:
+        config_file:
+            In MMSegmentation config format.
+            The contents vary for each task repository.
+        checkpoint_file:
+            In MMSegmentation checkpoint format.
+            The contents vary for each task repository.
+        output_folder:
+            Folder where `{model_name}.mar` will be created.
+            The file created will be in TorchServe archive format.
+        model_name:
+            If not None, used for naming the `{model_name}.mar` file
+            that will be created under `output_folder`.
+            If None, `{Path(checkpoint_file).stem}` will be used.
+        model_version:
+            Model's version.
+        force:
+            If True, if there is an existing `{model_name}.mar`
+            file under `output_folder` it will be overwritten.
+    """
+    mkdir_or_exist(output_folder)
+
+    config = Config.fromfile(config_file)
+
+    with TemporaryDirectory() as tmpdir:
+        config.dump(f'{tmpdir}/config.py')
+
+        args = Namespace(
+            **{
+                'model_file': f'{tmpdir}/config.py',
+                'serialized_file': checkpoint_file,
+                'handler': f'{Path(__file__).parent}/mmseg_handler.py',
+                'model_name': model_name or Path(checkpoint_file).stem,
+                'version': model_version,
+                'export_path': output_folder,
+                'force': force,
+                'requirements_file': None,
+                'extra_files': None,
+                'runtime': 'python',
+                'archive_format': 'default'
+            })
+        manifest = ModelExportUtils.generate_manifest_json(args)
+        package_model(args, manifest)
+
+
+def parse_args():
+    parser = ArgumentParser(
+        description='Convert mmseg models to TorchServe `.mar` format.')
+    parser.add_argument('config', type=str, help='config file path')
+    parser.add_argument('checkpoint', type=str, help='checkpoint file path')
+    parser.add_argument(
+        '--output-folder',
+        type=str,
+        required=True,
+        help='Folder where `{model_name}.mar` will be created.')
+    parser.add_argument(
+        '--model-name',
+        type=str,
+        default=None,
+        help='If not None, used for naming the `{model_name}.mar`'
+        'file that will be created under `output_folder`.'
+        'If None, `{Path(checkpoint_file).stem}` will be used.')
+    parser.add_argument(
+        '--model-version',
+        type=str,
+        default='1.0',
+        help='Number used for versioning.')
+    parser.add_argument(
+        '-f',
+        '--force',
+        action='store_true',
+        help='overwrite the existing `{model_name}.mar`')
+    args = parser.parse_args()
+
+    return args
+
+
+if __name__ == '__main__':
+    args = parse_args()
+
+    if package_model is None:
+        raise ImportError('`torch-model-archiver` is required.'
+                          'Try: pip install torch-model-archiver')
+
+    mmseg2torchserve(args.config, args.checkpoint, args.output_folder,
+                     args.model_name, args.model_version, args.force)
diff --git a/tools/torchserve/mmseg_handler.py b/tools/torchserve/mmseg_handler.py
new file mode 100644
index 0000000000000000000000000000000000000000..dbe5ded8482c1113a6adb45a22b650af71f6294e
--- /dev/null
+++ b/tools/torchserve/mmseg_handler.py
@@ -0,0 +1,56 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import base64
+import os
+
+import cv2
+import mmcv
+import torch
+from mmengine.model.utils import revert_sync_batchnorm
+from ts.torch_handler.base_handler import BaseHandler
+
+from mmseg.apis import inference_model, init_model
+
+
+class MMsegHandler(BaseHandler):
+
+    def initialize(self, context):
+        properties = context.system_properties
+        self.map_location = 'cuda' if torch.cuda.is_available() else 'cpu'
+        self.device = torch.device(self.map_location + ':' +
+                                   str(properties.get('gpu_id')) if torch.cuda.
+                                   is_available() else self.map_location)
+        self.manifest = context.manifest
+
+        model_dir = properties.get('model_dir')
+        serialized_file = self.manifest['model']['serializedFile']
+        checkpoint = os.path.join(model_dir, serialized_file)
+        self.config_file = os.path.join(model_dir, 'config.py')
+
+        self.model = init_model(self.config_file, checkpoint, self.device)
+        self.model = revert_sync_batchnorm(self.model)
+        self.initialized = True
+
+    def preprocess(self, data):
+        images = []
+
+        for row in data:
+            image = row.get('data') or row.get('body')
+            if isinstance(image, str):
+                image = base64.b64decode(image)
+            image = mmcv.imfrombytes(image)
+            images.append(image)
+
+        return images
+
+    def inference(self, data, *args, **kwargs):
+        results = [inference_model(self.model, img) for img in data]
+        return results
+
+    def postprocess(self, data):
+        output = []
+
+        for image_result in data:
+            _, buffer = cv2.imencode('.png', image_result[0].astype('uint8'))
+            content = buffer.tobytes()
+            output.append(content)
+        return output
diff --git a/tools/torchserve/test_torchserve.py b/tools/torchserve/test_torchserve.py
new file mode 100644
index 0000000000000000000000000000000000000000..b015b6658556e5045af2daf5d998de0de61e1f6b
--- /dev/null
+++ b/tools/torchserve/test_torchserve.py
@@ -0,0 +1,58 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from argparse import ArgumentParser
+from io import BytesIO
+
+import matplotlib.pyplot as plt
+import mmcv
+import requests
+
+from mmseg.apis import inference_model, init_model
+
+
+def parse_args():
+    parser = ArgumentParser(
+        description='Compare result of torchserve and pytorch,'
+        'and visualize them.')
+    parser.add_argument('img', help='Image file')
+    parser.add_argument('config', help='Config file')
+    parser.add_argument('checkpoint', help='Checkpoint file')
+    parser.add_argument('model_name', help='The model name in the server')
+    parser.add_argument(
+        '--inference-addr',
+        default='127.0.0.1:8080',
+        help='Address and port of the inference server')
+    parser.add_argument(
+        '--result-image',
+        type=str,
+        default=None,
+        help='save server output in result-image')
+    parser.add_argument(
+        '--device', default='cuda:0', help='Device used for inference')
+
+    args = parser.parse_args()
+    return args
+
+
+def main(args):
+    url = 'http://' + args.inference_addr + '/predictions/' + args.model_name
+    with open(args.img, 'rb') as image:
+        tmp_res = requests.post(url, image)
+    content = tmp_res.content
+    if args.result_image:
+        with open(args.result_image, 'wb') as out_image:
+            out_image.write(content)
+        plt.imshow(mmcv.imread(args.result_image, 'grayscale'))
+        plt.show()
+    else:
+        plt.imshow(plt.imread(BytesIO(content)))
+        plt.show()
+    model = init_model(args.config, args.checkpoint, args.device)
+    image = mmcv.imread(args.img)
+    result = inference_model(model, image)
+    plt.imshow(result[0])
+    plt.show()
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    main(args)
diff --git a/tools/train.py b/tools/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..21e0298ffd11f6b0d89733f8f1335127f1259d23
--- /dev/null
+++ b/tools/train.py
@@ -0,0 +1,104 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import logging
+import os
+import os.path as osp
+from vegseg.datasets import GrassDataset
+from mmengine.config import Config, DictAction
+from mmengine.logging import print_log
+from mmengine.runner import Runner
+
+from mmseg.registry import RUNNERS
+from ktda import models
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Train a segmentor')
+    parser.add_argument('config', help='train config file path')
+    parser.add_argument('--work-dir', help='the dir to save logs and models')
+    parser.add_argument(
+        '--resume',
+        action='store_true',
+        default=False,
+        help='resume from the latest checkpoint in the work_dir automatically')
+    parser.add_argument(
+        '--amp',
+        action='store_true',
+        default=False,
+        help='enable automatic-mixed-precision training')
+    parser.add_argument(
+        '--cfg-options',
+        nargs='+',
+        action=DictAction,
+        help='override some settings in the used config, the key-value pair '
+        'in xxx=yyy format will be merged into config file. If the value to '
+        'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
+        'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
+        'Note that the quotation marks are necessary and that no white space '
+        'is allowed.')
+    parser.add_argument(
+        '--launcher',
+        choices=['none', 'pytorch', 'slurm', 'mpi'],
+        default='none',
+        help='job launcher')
+    # When using PyTorch version >= 2.0.0, the `torch.distributed.launch`
+    # will pass the `--local-rank` parameter to `tools/train.py` instead
+    # of `--local_rank`.
+    parser.add_argument('--local_rank', '--local-rank', type=int, default=0)
+    args = parser.parse_args()
+    if 'LOCAL_RANK' not in os.environ:
+        os.environ['LOCAL_RANK'] = str(args.local_rank)
+
+    return args
+
+
+def main():
+    args = parse_args()
+
+    # load config
+    cfg = Config.fromfile(args.config)
+    cfg.launcher = args.launcher
+    if args.cfg_options is not None:
+        cfg.merge_from_dict(args.cfg_options)
+
+    # work_dir is determined in this priority: CLI > segment in file > filename
+    if args.work_dir is not None:
+        # update configs according to CLI args if args.work_dir is not None
+        cfg.work_dir = args.work_dir
+    elif cfg.get('work_dir', None) is None:
+        # use config filename as default work_dir if cfg.work_dir is None
+        cfg.work_dir = osp.join('./work_dirs',
+                                osp.splitext(osp.basename(args.config))[0])
+
+    # enable automatic-mixed-precision training
+    if args.amp is True:
+        optim_wrapper = cfg.optim_wrapper.type
+        if optim_wrapper == 'AmpOptimWrapper':
+            print_log(
+                'AMP training is already enabled in your config.',
+                logger='current',
+                level=logging.WARNING)
+        else:
+            assert optim_wrapper == 'OptimWrapper', (
+                '`--amp` is only supported when the optimizer wrapper type is '
+                f'`OptimWrapper` but got {optim_wrapper}.')
+            cfg.optim_wrapper.type = 'AmpOptimWrapper'
+            cfg.optim_wrapper.loss_scale = 'dynamic'
+
+    # resume training
+    cfg.resume = args.resume
+
+    # build the runner from config
+    if 'runner_type' not in cfg:
+        # build the default runner
+        runner = Runner.from_cfg(cfg)
+    else:
+        # build customized runner from the registry
+        # if 'runner_type' is set in the cfg
+        runner = RUNNERS.build(cfg)
+
+    # start training
+    runner.train()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/vis_cloud.py b/tools/vis_cloud.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d6df9e8444c3f44439ed2ac9d813b35c2188c90
--- /dev/null
+++ b/tools/vis_cloud.py
@@ -0,0 +1,57 @@
+from mmseg.apis import MMSegInferencer
+from glob import glob
+from vegseg.datasets import L8BIOMEDataset
+import numpy as np
+from typing import List
+import os
+from PIL import Image
+from vegseg import models
+
+def get_palette() -> List[int]:
+    """
+    get palette of dataset.
+    return:
+        palette: list of palette.
+    """
+    palette = []
+    palette_list = L8BIOMEDataset.METAINFO["palette"]
+    for palette_item in palette_list:
+        palette.extend(palette_item)
+    return palette
+
+
+def give_color_to_mask(
+    mask: Image.Image | np.ndarray, palette: List[int]
+) -> Image.Image:
+    """
+    give color to mask.
+    return:
+        color_mask: color mask.
+    """
+    color_mask = Image.fromarray(mask).convert("P")
+    color_mask.putpalette(palette)
+    return color_mask
+
+
+def main():
+    config_path = "work_dirs/experiment_p_l8/experiment_p_l8.py"
+    weight_path = "work_dirs/experiment_p_l8/best_mIoU_iter_20000.pth"
+    inference = MMSegInferencer(
+        model=config_path,
+        weights=weight_path,
+        device="cuda:1",
+        classes=L8BIOMEDataset.METAINFO["classes"],
+        palette=L8BIOMEDataset.METAINFO["palette"],
+    )
+    images = glob("data/vis/input/*.png")
+    palette = get_palette()
+    predictions = inference.__call__(images,batch_size=16)["predictions"]
+    for image_path, prediction in zip(images, predictions):
+        filename = os.path.basename(image_path)
+        filename = os.path.join("data/vis/ktda",filename)
+        prediction = prediction.astype(np.uint8)
+        color_mask = give_color_to_mask(prediction, palette=palette)
+        color_mask.save(filename)
+
+if __name__ == "__main__":
+    main()
diff --git "a/tools/vis_l8_\347\273\204\345\220\210.py" "b/tools/vis_l8_\347\273\204\345\220\210.py"
new file mode 100644
index 0000000000000000000000000000000000000000..5a083045cda17eed401dbc296761b979c3e061cf
--- /dev/null
+++ "b/tools/vis_l8_\347\273\204\345\220\210.py"
@@ -0,0 +1,119 @@
+from glob import glob
+from mmeval import MeanIoU
+from PIL import Image
+import numpy as np
+from typing import List
+from vegseg.datasets import L8BIOMEDataset
+from matplotlib import pyplot as plt
+import os
+
+def give_color_to_mask(
+    mask: Image.Image | np.ndarray, palette: List[int]
+) -> Image.Image:
+    """
+    Args:
+        mask: mask to color, numpy array or PIL Image.
+        palette: palette of dataset.
+    return:
+        mask: mask with color.
+    """
+    if isinstance(mask, np.ndarray):
+        mask = Image.fromarray(mask)
+    mask = mask.convert("P")
+    mask.putpalette(palette)
+    return mask
+
+def get_iou(pred: np.ndarray, gt: np.ndarray, num_classes=2):
+    pred = pred[np.newaxis]
+    gt = gt[np.newaxis]
+    miou = MeanIoU(num_classes=num_classes)
+    result = miou(pred, gt)
+    return result["mIoU"] * 100
+
+
+def get_palette() -> List[int]:
+    """
+    get palette of dataset.
+    return:
+        palette: list of palette.
+    """
+    palette = []
+    palette_list = L8BIOMEDataset.METAINFO["palette"]
+    for palette_item in palette_list:
+        palette.extend(palette_item)
+    return palette
+
+def main():
+    ktda = glob("data/vis/ktda/*.png")
+
+    all_images = [
+        "cdnetv1",
+        "cdnetv2",
+        "hrcloudnet",
+        "input",
+        "kappamask",
+        "ktda",
+        "label",
+        "mcdnet",
+        "scnn",
+        "unetmobv2",
+    ]
+    model_order = [
+        "ktda",
+        "cdnetv1",
+        "cdnetv2",
+        "hrcloudnet",
+        "kappamask",
+        "mcdnet",
+        "scnn",
+        "unetmobv2",
+    ]
+    palette = get_palette()
+    for ktda_path in ktda:
+        images_paths = [
+            ktda_path.replace("ktda", filename) for filename in all_images
+        ]
+        model_name_mask = {}
+        model_iou = {}
+        label_path = ktda_path.replace("ktda", "label")
+        for image_path in images_paths:
+            model_name = image_path.split("/")[-2]
+            if model_name in ["input", "label"]:
+                continue
+            model_name_mask[model_name] = np.array(Image.open(image_path))
+            model_iou[model_name] = get_iou(
+                model_name_mask[model_name], np.array(Image.open(label_path)),num_classes=4
+            )
+        result_iou_sorted = sorted(model_iou.items(), key=lambda x: x[1], reverse=True)
+        if result_iou_sorted[0][0] != "ktda":
+            continue
+        input_path = ktda_path.replace("ktda", "input")
+
+        plt.figure(figsize=(32, 8))
+        plt.subplots_adjust(wspace=0.01)
+        plt.subplot(1, 10, 1)
+        plt.imshow(Image.open(input_path))
+        plt.axis("off")
+
+        plt.subplot(1, 10, 2)
+        plt.imshow(give_color_to_mask(Image.open(label_path), palette=palette))
+        plt.axis("off")
+
+        for i, model_name in enumerate(model_order):
+            plt.subplot(1, 10, i + 3)
+            plt.imshow(give_color_to_mask(model_name_mask[model_name], palette))
+            plt.axis("off")
+        base_name = os.path.basename(ktda_path).split(".")[0]
+        diff_iou = result_iou_sorted[0][1] - result_iou_sorted[1][1]
+        plt.savefig(
+            f"l8_vis/{diff_iou:.2f}_{base_name}.svg",
+            dpi=300,
+            bbox_inches="tight",
+            pad_inches=0,
+        )
+        plt.close()
+
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/vis_model_plus.py b/tools/vis_model_plus.py
new file mode 100644
index 0000000000000000000000000000000000000000..e1aaf5ca2129d56d7cb43d6a83ca13f1f4e5e97b
--- /dev/null
+++ b/tools/vis_model_plus.py
@@ -0,0 +1,182 @@
+from glob import glob
+import argparse
+import os
+from typing import Tuple, List
+import numpy as np
+from mmeval import MeanIoU
+from PIL import Image
+from matplotlib import pyplot as plt
+from mmseg.apis import MMSegInferencer
+from vegseg.datasets import GrassDataset
+from vegseg import models
+
+
+def get_iou(pred: np.ndarray, gt: np.ndarray, num_classes=2):
+    pred = pred[np.newaxis]
+    gt = gt[np.newaxis]
+    miou = MeanIoU(num_classes=num_classes)
+    result = miou(pred, gt)
+    return result["mIoU"] * 100
+
+
+def get_args() -> Tuple[str, str, int]:
+    """
+    get args
+    return:
+        --device: device to use.
+        --dataset_path: dataset path.
+        --output_path: output path for saving.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--device", type=str, default="cuda:4")
+    parser.add_argument("--dataset_path", type=str, default="data/grass")
+    args = parser.parse_args()
+    return args.device, args.dataset_path
+
+
+def give_color_to_mask(
+    mask: Image.Image | np.ndarray, palette: List[int]
+) -> Image.Image:
+    """
+    Args:
+        mask: mask to color, numpy array or PIL Image.
+        palette: palette of dataset.
+    return:
+        mask: mask with color.
+    """
+    if isinstance(mask, np.ndarray):
+        mask = Image.fromarray(mask)
+    mask = mask.convert("P")
+    mask.putpalette(palette)
+    return mask
+
+
+def get_image_and_mask_paths(
+    dataset_path: str, num: int
+) -> Tuple[List[str], List[str]]:
+    """
+    get image and mask paths from dataset path.
+    return:
+        image_paths: list of image paths.
+        mask_paths: list of mask paths.
+    """
+    image_paths = glob(os.path.join(dataset_path, "img_dir", "*", "*.tif"))
+    if num != -1:
+        image_paths = image_paths[:num]
+    mask_paths = [
+        filename.replace("tif", "png").replace("img_dir", "ann_dir")
+        for filename in image_paths
+    ]
+    return image_paths, mask_paths
+
+
+def get_palette() -> List[int]:
+    """
+    get palette of dataset.
+    return:
+        palette: list of palette.
+    """
+    palette = []
+    palette_list = GrassDataset.METAINFO["palette"]
+    for palette_item in palette_list:
+        palette.extend(palette_item)
+    return palette
+
+
+def init_all_models(models_paths: List[str], device: str):
+    """
+    init all models
+    Args:
+        models_path (str): path to all models.
+        device (str): device to use.
+    Return:
+        models (dict): dict of models.
+    """
+    models = {}
+    for model_path in models_paths:
+        config_path = glob(os.path.join(model_path, "*.py"))[0]
+        weight_path = glob(os.path.join(model_path, "best_mIoU_iter_*.pth"))[0]
+        inference = MMSegInferencer(
+            config_path,
+            weight_path,
+            device=device,
+            classes=GrassDataset.METAINFO["classes"],
+            palette=GrassDataset.METAINFO["palette"],
+        )
+        model_name = model_path.split(os.path.sep)[-1]
+        models[model_name] = inference
+    return models
+
+
+def main():
+    device, dataset_path = get_args()
+    image_paths, mask_paths = get_image_and_mask_paths(dataset_path, -1)
+    palette = get_palette()
+    models_paths = [
+        r"work_dirs/fcn_r50",
+        r"work_dirs/pspnet_r101",
+        r"work_dirs/deeplabv3plus_r101",
+        r"work_dirs/unet-s5-d16_deeplabv3",
+        r"work_dirs/segformer_mit-b5",
+        r"work_dirs/mask2former_swin_b",
+        r"work_dirs/dinov2_upernet",
+        r"work_dirs/experiment_p",
+    ]
+    models = init_all_models(models_paths, device)
+
+    model_order = [
+        "experiment_p",
+        "fcn_r50",
+        "pspnet_r101",
+        "deeplabv3plus_r101",
+        "unet-s5-d16_deeplabv3",
+        "segformer_mit-b5",
+        "mask2former_swin_b",
+        "dinov2_upernet"
+    ]
+
+    os.makedirs("vis_results", exist_ok=True)
+    for image_path, mask_path in zip(image_paths, mask_paths):
+        result_eval = {}
+        result_iou = {}
+        mask = Image.open(mask_path)
+        for model_name, inference in models.items():
+            predictions: np.ndarray = inference(image_path)["predictions"]
+            predictions = predictions.astype(np.uint8)
+            result_eval[model_name] = predictions
+            result_iou[model_name] = get_iou(predictions, np.array(mask), num_classes=5)
+
+        # 根据iou 进行排序
+        result_iou_sorted = sorted(result_iou.items(), key=lambda x: x[1], reverse=True)
+
+        if result_iou_sorted[0][0] != "experiment_p":
+            continue
+
+        plt.figure(figsize=(32, 8))
+        plt.subplots_adjust(wspace=0.01)
+        plt.subplot(1, 10, 1)
+        plt.imshow(Image.open(image_path))
+        plt.axis("off")
+
+        plt.subplot(1, 10, 2)
+        plt.imshow(give_color_to_mask(mask, palette=palette))
+        plt.axis("off")
+
+        for i, model_name in enumerate(model_order):
+            plt.subplot(1, 10, i + 3)
+            plt.imshow(give_color_to_mask(result_eval[model_name], palette))
+            plt.axis("off")
+
+        base_name = os.path.basename(image_path).split(".")[0]
+        diff_iou = result_iou_sorted[0][1] - result_iou_sorted[1][1]
+        plt.savefig(
+            f"vis_results/{diff_iou:.2f}_{base_name}.svg",
+            dpi=300,
+            bbox_inches="tight",
+            pad_inches=0,
+        )
+
+
+if __name__ == "__main__":
+    # example usage: python tools/vis_model.py --models work_dirs --device cuda:0 --dataset_path data/grass
+    main()
diff --git "a/tools/\344\275\277\347\224\250\350\276\205\345\212\251\345\244\264\351\242\204\346\265\213.py" "b/tools/\344\275\277\347\224\250\350\276\205\345\212\251\345\244\264\351\242\204\346\265\213.py"
new file mode 100644
index 0000000000000000000000000000000000000000..242dc049933f10750b269877ee80a7dfe9c318ce
--- /dev/null
+++ "b/tools/\344\275\277\347\224\250\350\276\205\345\212\251\345\244\264\351\242\204\346\265\213.py"
@@ -0,0 +1,69 @@
+from mmseg.apis import init_model,inference_model
+from PIL import Image
+from vegseg.datasets import GrassDataset
+import numpy as np
+from typing import Tuple, List
+from glob import glob
+import torch
+from mmeval import MeanIoU
+from vegseg.models import DistillEncoderDecoder
+
+def get_iou(pred: np.ndarray, gt: np.ndarray, num_classes=5):
+    pred = pred[np.newaxis]
+    gt = gt[np.newaxis]
+    miou = MeanIoU(num_classes=num_classes)
+    result = miou(pred, gt)
+    return result["mIoU"] * 100
+
+def get_palette() -> List[int]:
+    """
+    get palette of dataset.
+    return:
+        palette: list of palette.
+    """
+    palette = []
+    palette_list = GrassDataset.METAINFO["palette"]
+    for palette_item in palette_list:
+        palette.extend(palette_item)
+    return palette
+
+config = "work_dirs/experiment_p/experiment_p.py"
+checkpoint = "work_dirs/experiment_p/best_mIoU_iter_22770.pth"
+device = "cuda:4"
+image_paths = glob("data/grass/img_dir/*/*.tif")
+batch_size = 64
+mask_paths = [filename.replace("img_dir","ann_dir").replace(".tif",".png") for filename in image_paths]
+model: DistillEncoderDecoder = init_model(config=config, checkpoint=checkpoint, device=device)
+model.decode_head = model.auxiliary_head
+model.eval()
+
+save_mask = None
+cur_iou = 0
+save_filename = None
+
+
+for i in range(0,len(image_paths),batch_size):
+    end_index = min(len(image_paths),i+batch_size)
+    image_paths_list = image_paths[i:end_index]
+    mask_paths_list = mask_paths[i:end_index]
+    results = inference_model(model, image_paths_list)
+    for mask_path,result in zip(mask_paths_list,results):
+
+        mask = np.array(Image.open(mask_path))
+        pred = result.pred_sem_seg.data.cpu().numpy()[0].astype(np.uint8)
+        iou = get_iou(pred,mask)
+        if iou > cur_iou and len(np.unique(mask).reshape(-1)) > 3:
+            cur_iou = iou
+            save_mask = pred
+            save_filename = mask_path
+
+
+auxiliary_img = Image.fromarray(save_mask).convert('P')
+palette = get_palette()
+auxiliary_img.putpalette(palette)
+auxiliary_img.save("auxiliary.png")
+print(save_filename)
+image_filename = save_filename.replace(".png",".tif").replace("ann_dir","img_dir")
+Image.open(image_filename).save("image.png")
+Image.open(save_filename).save("mask.png")
+