XavierJiezou commited on Dec 8, 2024

Commit

918db92

verified ·

1 Parent(s): 3a43a03

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

configs/dinov2/dinov2_upernet_water.py +13 -0
configs/ktda/dinov2_b_frozen-fam-fmm.py +18 -0
configs/ktda/dinov2_b_frozen-fam.py +13 -0
configs/ktda/experiment_a.py +14 -0
configs/ktda/experiment_aa.py +46 -0
configs/ktda/experiment_k.py +14 -0
configs/ktda/experiment_u.py +15 -0
configs/ktda/experiment_v.py +26 -0
configs/ktda/ktda_grass.py +19 -0
configs/pspnet/pspnet_r101_water.py +15 -0
configs/pspnet/pspnet_r50.py +13 -0
configs/segformer/segformer_mit-b0_water.py +14 -0
ktda/datasets/__init__.py +7 -0
ktda/datasets/grass.py +55 -0
ktda/datasets/l8_biome.py +29 -0
ktda/models/__init__.py +4 -0
ktda/models/__pycache__/__init__.cpython-311.pyc +0 -0
ktda/models/adapter/__init__.py +4 -0
ktda/models/adapter/__pycache__/__init__.cpython-311.pyc +0 -0
ktda/models/adapter/__pycache__/fam.cpython-311.pyc +0 -0
ktda/models/adapter/__pycache__/fmm.cpython-311.pyc +0 -0
ktda/models/adapter/fam.py +37 -0
ktda/models/adapter/fmm.py +109 -0
ktda/models/segmentors/__pycache__/__init__.cpython-311.pyc +0 -0
ktda/models/segmentors/__pycache__/distill_encoder_decoder.cpython-311.pyc +0 -0
ktda/models/segmentors/distill_encoder_decoder.py +382 -0
requirements/docs.txt +7 -0
requirements/optional.txt +22 -0
requirements/runtime.txt +5 -0
tools/analysis_tools/analyze_logs.py +130 -0
tools/analysis_tools/benchmark.py +121 -0
tools/analysis_tools/confusion_matrix.py +197 -0
tools/analysis_tools/get_flops.py +126 -0
tools/analysis_tools/visualization_cam.py +127 -0
tools/dataset_converters/chase_db1.py +89 -0
tools/dataset_converters/cityscapes.py +56 -0
tools/dataset_converters/coco_stuff10k.py +308 -0
tools/dataset_converters/coco_stuff164k.py +265 -0
tools/dataset_converters/hrf.py +112 -0
tools/dataset_converters/isaid.py +246 -0
tools/dataset_converters/levircd.py +99 -0
tools/dataset_converters/loveda.py +73 -0
tools/dataset_converters/nyu.py +89 -0
tools/dataset_converters/pascal_context.py +87 -0
tools/dataset_converters/potsdam.py +158 -0
tools/dataset_converters/refuge.py +110 -0
tools/dataset_converters/stare.py +167 -0
tools/dataset_converters/synapse.py +155 -0
tools/dataset_converters/voc_aug.py +92 -0
tools/dataset_tools/create_dataset.py +185 -0

configs/dinov2/dinov2_upernet_water.py ADDED Viewed

	@@ -0,0 +1,13 @@

+_base_ = [
+    "../_base_/models/dinov2_upernet.py",
+    "../_base_/datasets/water.py",
+    "../_base_/default_runtime.py",
+    "../_base_/schedules/water_schedule.py",
+]
+data_preprocessor = dict(size=(512, 512))
+model = dict(
+    data_preprocessor=data_preprocessor,
+    decode_head=dict(num_classes=6),
+    auxiliary_head=dict(num_classes=6)
+)

configs/ktda/dinov2_b_frozen-fam-fmm.py ADDED Viewed

	@@ -0,0 +1,18 @@

+_base_ = [
+    "../_base_/models/ktda.py",
+    "../_base_/datasets/grass.py",
+    "../_base_/default_runtime.py",
+    "../_base_/schedules/grass_schedule.py",
+]
+data_preprocessor = dict(size=(256, 256))
+model = dict(
+    data_preprocessor=data_preprocessor,
+    decode_head=dict(
+        num_classes=5,
+    ),
+    auxiliary_head=dict(
+        num_classes=5,
+    ),
+    fmm=dict(type="FMM", in_channels=[768, 768, 768, 768]),
+)

configs/ktda/dinov2_b_frozen-fam.py ADDED Viewed

	@@ -0,0 +1,13 @@

+_base_ = [
+    "../_base_/models/ktda.py",
+    "../_base_/datasets/grass.py",
+    "../_base_/default_runtime.py",
+    "../_base_/schedules/grass_schedule.py",
+]
+data_preprocessor = dict(size=(256, 256))
+model = dict(
+    data_preprocessor=data_preprocessor,
+    decode_head=dict(num_classes=5),
+    auxiliary_head=dict(num_classes=5)
+)

configs/ktda/experiment_a.py ADDED Viewed

	@@ -0,0 +1,14 @@

+_base_ = [
+    "../_base_/models/ktda.py",
+    "../_base_/datasets/grass.py",
+    "../_base_/default_runtime.py",
+    "../_base_/schedules/grass_schedule.py",
+]
+data_preprocessor = dict(size=(256, 256))
+model = dict(
+    student_training=False,
+    data_preprocessor=data_preprocessor,
+    decode_head=dict(num_classes=5),
+    auxiliary_head=dict(num_classes=5)
+)

configs/ktda/experiment_aa.py ADDED Viewed

	@@ -0,0 +1,46 @@

+_base_ = [
+    "../_base_/models/convnextv2_femto_vit_segformer_vegseg.py",
+    "../_base_/datasets/grass.py",
+    "../_base_/default_runtime.py",
+    "../_base_/schedules/grass_schedule.py",
+]
+data_preprocessor = dict(size=(256, 256))
+model = dict(
+    teach_backbone=dict(
+        type="mmpretrain.VisionTransformer",
+        arch="large",
+        frozen_stages=24,
+        img_size=256,
+        patch_size=14,
+        layer_scale_init_value=1e-5,
+        out_indices=(7, 11, 15, 23),
+        out_type="featmap",
+        init_cfg=dict(
+            type="Pretrained",
+            checkpoint="checkpoints/dinov2-large.pth",
+            prefix="backbone",
+        ),
+    ),
+    fam=dict(out_channels=1024),
+    decode_head=dict(in_channels=[1024, 1024, 1024, 1024], num_classes=5),
+    data_preprocessor=data_preprocessor,
+    auxiliary_head=[
+        dict(
+            type="FCNHead",
+            in_channels=1024,
+            in_index=i,
+            channels=256,
+            num_convs=1,
+            concat_input=False,
+            dropout_ratio=0.1,
+            num_classes=5,
+            norm_cfg=dict(type="SyncBN", requires_grad=True),
+            align_corners=False,
+            loss_decode=dict(
+                type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4
+            ),
+        )
+        for i in range(4)
+    ],
+)

configs/ktda/experiment_k.py ADDED Viewed

	@@ -0,0 +1,14 @@

+_base_ = [
+    "../_base_/models/ktda.py",
+    "../_base_/datasets/grass.py",
+    "../_base_/default_runtime.py",
+    "../_base_/schedules/grass_schedule.py",
+]
+data_preprocessor = dict(size=(256, 256))
+model = dict(
+    data_preprocessor=data_preprocessor,
+    decode_head=dict(num_classes=5),
+    auxiliary_head=dict(num_classes=5),
+    fmm=dict(type="FMM", in_channels=[768, 768, 768, 768],mlp_nums=4),
+)

configs/ktda/experiment_u.py ADDED Viewed

	@@ -0,0 +1,15 @@

+_base_ = [
+    "../_base_/models/ktda.py",
+    "../_base_/datasets/grass.py",
+    "../_base_/default_runtime.py",
+    "../_base_/schedules/grass_schedule.py",
+]
+data_preprocessor = dict(size=(256, 256))
+model = dict(
+    data_preprocessor=data_preprocessor,
+    decode_head=dict(num_classes=5),
+    auxiliary_head=dict(num_classes=5),
+    neck=None,
+    fmm=dict(type="FMM", in_channels=[768, 768, 768, 768]),
+)

configs/ktda/experiment_v.py ADDED Viewed

	@@ -0,0 +1,26 @@

+_base_ = [
+    "../_base_/models/ktda.py",
+    "../_base_/datasets/grass.py",
+    "../_base_/default_runtime.py",
+    "../_base_/schedules/grass_schedule.py",
+]
+data_preprocessor = dict(size=(256, 256))
+model = dict(
+    data_preprocessor=data_preprocessor,
+    decode_head=dict(
+        _delete_=True,
+        type="SegformerHead",
+        in_channels=[768, 768, 768, 768],
+        in_index=[0, 1, 2, 3],
+        channels=256,
+        dropout_ratio=0.1,
+        num_classes=5,
+        norm_cfg=dict(type="SyncBN", requires_grad=True),
+        align_corners=False,
+        loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0),
+    ),
+    auxiliary_head=dict(num_classes=5),
+    neck=None,
+    fmm=dict(type="FMM", in_channels=[768, 768, 768, 768]),
+)

configs/ktda/ktda_grass.py ADDED Viewed

	@@ -0,0 +1,19 @@

+_base_ = [
+    "../_base_/models/ktda.py",
+    "../_base_/datasets/grass.py",
+    "../_base_/default_runtime.py",
+    "../_base_/schedules/grass_schedule.py",
+]
+data_preprocessor = dict(size=(256, 256))
+model = dict(
+    data_preprocessor=data_preprocessor,
+    decode_head=dict(num_classes=5),
+    auxiliary_head=dict(num_classes=5),
+    fmm=dict(
+        type="FMM",
+        in_channels=[768, 768, 768, 768],
+        model_type="vitBlock",
+        mlp_nums=4,
+    ),
+)

configs/pspnet/pspnet_r101_water.py ADDED Viewed

	@@ -0,0 +1,15 @@

+_base_ = [
+    "../_base_/models/pspnet_r50-d8.py",
+    "../_base_/datasets/water.py",
+    "../_base_/default_runtime.py",
+    "../_base_/schedules/water_schedule.py",
+]
+data_preprocessor = dict(size=(512, 512))
+model = dict(
+    data_preprocessor=data_preprocessor,
+    pretrained='open-mmlab://resnet101_v1c',
+    backbone=dict(depth=101),
+    decode_head=dict(num_classes=6),
+    auxiliary_head=dict(num_classes=6)
+)

configs/pspnet/pspnet_r50.py ADDED Viewed

	@@ -0,0 +1,13 @@

+_base_ = [
+    "../_base_/models/pspnet_r50-d8.py",
+    "../_base_/datasets/grass.py",
+    "../_base_/default_runtime.py",
+    "../_base_/schedules/grass_schedule.py",
+]
+data_preprocessor = dict(size=(256, 256))
+model = dict(
+    data_preprocessor=data_preprocessor,
+    decode_head=dict(num_classes=5),
+    auxiliary_head=dict(num_classes=5)
+)

configs/segformer/segformer_mit-b0_water.py ADDED Viewed

	@@ -0,0 +1,14 @@

+_base_ = [
+    "../_base_/models/segformer_mit-b0.py",
+    "../_base_/datasets/water.py",
+    "../_base_/default_runtime.py",
+    "../_base_/schedules/water_schedule.py",
+]
+data_preprocessor = dict(size=(512, 512))
+checkpoint = "https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b0_20220624-7e0fe6dd.pth"  # noqa
+model = dict(
+    data_preprocessor=data_preprocessor,
+    backbone=dict(init_cfg=dict(type="Pretrained", checkpoint=checkpoint)),
+    decode_head=dict(num_classes=6),
+)

ktda/datasets/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from .grass import GrassDataset
+from .l8_biome import L8BIOMEDataset
+__all__ = [
+    "GrassDataset",
+    "L8BIOMEDataset"
+]

ktda/datasets/grass.py ADDED Viewed

	@@ -0,0 +1,55 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+from typing import List
+import mmengine.fileio as fileio
+from mmseg.registry import DATASETS
+from mmseg.datasets import BaseSegDataset
+@DATASETS.register_module()
+class GrassDataset(BaseSegDataset):
+    """grass segmentation dataset. The file structure should be.
+    .. code-block:: none
+        ├── data
+        │   ├── grass
+        │   │   ├── img_dir
+        │   │   │   ├── train
+        │   │   │   │   ├──0.tif
+        │   │   │   │   ├──...
+        │   │   │   ├── val
+        │   │   │   │   ├──9.tif
+        │   │   │   │   ├──...
+        │   │   ├── ann_dir
+        │   │   │   ├── train
+        │   │   │   │   ├──0.png
+        │   │   │   │   ├──...
+        │   │   │   ├── val
+        │   │   │   │   ├──9.png
+        │   │   │   │   ├──...
+    """
+    METAINFO = dict(
+        classes=("low", "middle-low", "middle", "middle-high", "high"),
+        palette=[
+            [185, 101, 71],
+            [248, 202, 155],
+            [211, 232, 158],
+            [138, 191, 104],
+            [92, 144, 77],
+        ],
+    )
+    def __init__(self,
+                 img_suffix='.tif',
+                 seg_map_suffix='.png',
+                 reduce_zero_label=False,
+                 **kwargs) -> None:
+        super().__init__(
+            img_suffix=img_suffix,
+            seg_map_suffix=seg_map_suffix,
+            reduce_zero_label=reduce_zero_label,
+            **kwargs)

ktda/datasets/l8_biome.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from mmseg.registry import DATASETS
+from mmseg.datasets import BaseSegDataset
+@DATASETS.register_module()
+class L8BIOMEDataset(BaseSegDataset):
+    METAINFO = dict(
+        classes=("Clear", "Cloud Shadow", "Thin Cloud", "Cloud"),
+        palette=[
+            [79, 253, 199],
+            [221, 53, 223],
+            [251, 255, 41],
+            [77, 2, 115],
+        ],
+    )
+    def __init__(
+        self,
+        img_suffix=".png",
+        seg_map_suffix=".png",
+        reduce_zero_label=False,
+        **kwargs
+    ) -> None:
+        super().__init__(
+            img_suffix=img_suffix,
+            seg_map_suffix=seg_map_suffix,
+            reduce_zero_label=reduce_zero_label,
+            **kwargs
+        )

ktda/models/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .segmentors import DistillEncoderDecoder
+from .adapter import FAM,FMM
+__all__ = ["DistillEncoderDecoder", "FAM","FMM"]

ktda/models/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (338 Bytes). View file

ktda/models/adapter/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .fam import FAM
+from .fmm import FMM
+__all__ = ["FAM", "FMM"]

ktda/models/adapter/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (288 Bytes). View file

ktda/models/adapter/__pycache__/fam.cpython-311.pyc ADDED Viewed

Binary file (2.86 kB). View file

ktda/models/adapter/__pycache__/fmm.cpython-311.pyc ADDED Viewed

Binary file (5.88 kB). View file

ktda/models/adapter/fam.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from mmseg.registry import MODELS
+from mmengine.model import BaseModule
+from torch import nn as nn
+from torch.nn import functional as F
+from timm.models.layers import trunc_normal_
+@MODELS.register_module()
+class FAM(BaseModule):
+    def __init__(self, in_channels, out_channels, output_size,init_cfg=None):
+        super().__init__(init_cfg)
+        self.convert = nn.ModuleList()
+        self.output_size = output_size
+        if isinstance(out_channels, int):
+            out_channels = [out_channels] * len(in_channels)
+        for in_channel, out_channel in zip(in_channels, out_channels):
+            self.convert.append(
+                nn.Conv2d(in_channel, out_channel, kernel_size=1),
+            )
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, (nn.Conv2d, nn.Linear)):
+            trunc_normal_(m.weight, std=.02)
+            nn.init.constant_(m.bias, 0)
+    def forward(self, inputs):
+        outs = []
+        for index, x in enumerate(inputs):
+            x = self.convert[index](x)
+            x = F.interpolate(
+                x, size=(self.output_size,self.output_size), align_corners=False, mode="bilinear"
+            )
+            outs.append(x)
+        return tuple(outs)

ktda/models/adapter/fmm.py ADDED Viewed

	@@ -0,0 +1,109 @@

+from mmseg.registry import MODELS
+from mmengine.model import BaseModule
+from torch import nn as nn
+from torch.nn import functional as F
+from typing import Callable, Optional
+from torch import Tensor
+from timm.models.layers import trunc_normal_
+from timm.models.vision_transformer import Block as TransformerBlock
+class Mlp(nn.Module):
+    def __init__(
+        self,
+        in_features: int,
+        hidden_features: Optional[int] = None,
+        out_features: Optional[int] = None,
+        act_layer: Callable[..., nn.Module] = nn.GELU,
+        drop: float = 0.0,
+        bias: bool = True,
+    ) -> None:
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features, bias=bias)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features, bias=bias)
+        self.drop = nn.Dropout(drop)
+    def forward(self, x: Tensor) -> Tensor:
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+@MODELS.register_module()
+class FMM(BaseModule):
+    def __init__(
+        self,
+        in_channels,
+        rank_dim=4,
+        mlp_nums=1,
+        model_type="mlp",
+        num_heads=8,
+        mlp_ratio=4,
+        qkv_bias=True,
+        qk_norm=False,
+        init_values=None,
+        proj_drop_rate: float = 0.0,
+        attn_drop_rate: float = 0.0,
+        init_cfg=None,
+    ):
+        super().__init__(init_cfg)
+        self.adapters = nn.ModuleList()
+        if model_type == "mlp":
+            for in_channel in in_channels:
+                mlp_list = []
+                for _ in range(mlp_nums):
+                    mlp_list.append(
+                        Mlp(
+                            in_channel,
+                            hidden_features=in_channel // rank_dim,
+                            out_features=in_channel,
+                        )
+                    )
+                mlp_model = nn.Sequential(*mlp_list)
+                self.adapters.append(mlp_model)
+        elif model_type == "vitBlock":
+            for in_channel in in_channels:
+                model_list = []
+                for _ in range(mlp_nums):
+                    model_list.append(
+                        TransformerBlock(
+                            in_channel,
+                            num_heads=num_heads,
+                            mlp_ratio=mlp_ratio,
+                            qkv_bias=qkv_bias,
+                            qk_norm=qk_norm,
+                            init_values=init_values,
+                            proj_drop=proj_drop_rate,
+                            attn_drop=attn_drop_rate,
+                        )
+                    )
+                self.adapters.append(nn.Sequential(*model_list))
+        else:
+            raise ValueError(f"model type must in ['mlp','vitBlock'],actually is {model_type}")
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, (nn.Conv2d, nn.Linear)):
+            trunc_normal_(m.weight, std=0.02)
+            nn.init.constant_(m.bias, 0)
+    def forward(self, inputs):
+        outs = []
+        for index, x in enumerate(inputs):
+            B, C, H, W = x.shape
+            x = x.permute(0, 2, 3, 1)
+            x = x.reshape(B, -1, C)
+            x = self.adapters[index](x)
+            x = x.reshape(B, H, W, C)
+            x = x.permute(0, 3, 1, 2)
+            outs.append(x)
+        return tuple(outs)

ktda/models/segmentors/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (274 Bytes). View file

ktda/models/segmentors/__pycache__/distill_encoder_decoder.cpython-311.pyc ADDED Viewed

Binary file (19.8 kB). View file

ktda/models/segmentors/distill_encoder_decoder.py ADDED Viewed

	@@ -0,0 +1,382 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import logging
+from typing import List, Optional
+import torch.nn as nn
+import torch.nn.functional as F
+from mmengine.logging import print_log
+from torch import Tensor
+from mmseg.registry import MODELS
+from mmseg.utils import (
+    ConfigType,
+    OptConfigType,
+    OptMultiConfig,
+    OptSampleList,
+    SampleList,
+    add_prefix,
+)
+from mmseg.models import BaseSegmentor
+@MODELS.register_module()
+class DistillEncoderDecoder(BaseSegmentor):
+    def __init__(
+        self,
+        backbone: ConfigType,
+        teach_backbone: ConfigType,
+        decode_head: ConfigType,
+        neck: OptConfigType = None,
+        auxiliary_head: OptConfigType = None,
+        fam: OptConfigType = None,
+        fmm: OptConfigType = None,
+        train_cfg: OptConfigType = None,
+        test_cfg: OptConfigType = None,
+        data_preprocessor: OptConfigType = None,
+        pretrained: Optional[str] = None,
+        student_training=True,
+        temperature=1.0,
+        alpha=0.5,
+        fuse=False,
+        init_cfg: OptMultiConfig = None,
+    ):
+        super().__init__(data_preprocessor=data_preprocessor, init_cfg=init_cfg)
+        self.temperature = temperature
+        self.alpha = alpha
+        self.student_training = student_training
+        self.fuse = fuse
+        if pretrained is not None:
+            assert (
+                backbone.get("pretrained") is None
+            ), "both backbone and segmentor set pretrained weight"
+            assert (
+                teach_backbone.get("pretrained") is None
+            ), "both teach backbone and segmentor set pretrained weight"
+            backbone.pretrained = pretrained
+            teach_backbone.pretrained = pretrained
+        self.backbone = MODELS.build(backbone)
+        self.teach_backbone = MODELS.build(teach_backbone)
+        if neck is not None:
+            self.neck = MODELS.build(neck)
+        self.fam = nn.Identity()
+        self.fmm = nn.Identity()
+        if fam is not None:
+            self.fam = MODELS.build(fam)
+        if fmm is not None:
+            self.fmm = MODELS.build(fmm)
+        self._init_decode_head(decode_head)
+        self._init_auxiliary_head(auxiliary_head)
+        self.train_cfg = train_cfg
+        self.test_cfg = test_cfg
+        assert self.with_decode_head
+    def _init_decode_head(self, decode_head: ConfigType) -> None:
+        """Initialize ``decode_head``"""
+        self.decode_head = MODELS.build(decode_head)
+        self.align_corners = self.decode_head.align_corners
+        self.num_classes = self.decode_head.num_classes
+        self.out_channels = self.decode_head.out_channels
+    def _init_auxiliary_head(self, auxiliary_head: ConfigType) -> None:
+        """Initialize ``auxiliary_head``"""
+        if auxiliary_head is not None:
+            if isinstance(auxiliary_head, list):
+                self.auxiliary_head = nn.ModuleList()
+                for head_cfg in auxiliary_head:
+                    self.auxiliary_head.append(MODELS.build(head_cfg))
+            else:
+                self.auxiliary_head = MODELS.build(auxiliary_head)
+    def fuse_features(self,features):
+        x = features[0]
+        for index,feature in enumerate(features):
+            if index == 0:
+                continue
+            x += feature
+        x = [x]
+        return tuple(x)
+    def extract_feat(self, inputs: Tensor) -> List[Tensor]:
+        """Extract features from images."""
+        x = self.backbone(inputs)
+        x = self.fam(x)
+        if self.fuse:
+            x = self.fuse_features(x)
+        if self.with_neck:
+            x = self.neck(x)
+        x = self.fmm(x)
+        return x
+    def encode_decode(self, inputs: Tensor, batch_img_metas: List[dict]) -> Tensor:
+        """Encode images with backbone and decode into a semantic segmentation
+        map of the same size as input."""
+        x = self.extract_feat(inputs)
+        seg_logits = self.decode_head.predict(x, batch_img_metas, self.test_cfg)
+        return seg_logits
+    def _decode_head_forward_train(
+        self, inputs: List[Tensor], data_samples: SampleList
+    ) -> dict:
+        """Run forward function and calculate loss for decode head in
+        training."""
+        losses = dict()
+        loss_decode = self.decode_head.loss(inputs, data_samples, self.train_cfg)
+        losses.update(add_prefix(loss_decode, "decode"))
+        return losses
+    def _auxiliary_head_forward_train(
+        self, inputs: List[Tensor], data_samples: SampleList
+    ) -> dict:
+        """Run forward function and calculate loss for auxiliary head in
+        training."""
+        losses = dict()
+        if isinstance(self.auxiliary_head, nn.ModuleList):
+            for idx, aux_head in enumerate(self.auxiliary_head):
+                loss_aux = aux_head.loss(inputs, data_samples, self.train_cfg)
+                for key in loss_aux.keys():
+                    loss_aux[key] = loss_aux[key] / len(self.auxiliary_head)
+                losses.update(add_prefix(loss_aux, f"aux_{idx}"))
+        else:
+            loss_aux = self.auxiliary_head.loss(inputs, data_samples, self.train_cfg)
+            losses.update(add_prefix(loss_aux, "aux"))
+        return losses
+    def calculate_diltill_loss(self, inputs):
+        student_feats = self.backbone(inputs)
+        student_feats = self.fam(student_feats)
+        teach_feats = self.teach_backbone(inputs)
+        if self.fuse:
+            student_feats = self.fuse_features(student_feats)
+            teach_feats = self.fuse_features(teach_feats)
+        total_loss = 0.0
+        for student_feat, teach_feat in zip(student_feats, teach_feats):
+            student_prob = F.softmax(student_feat / self.temperature, dim=-1)
+            teach_prob = F.softmax(teach_feat / self.temperature, dim=-1)
+            kl_loss = F.kl_div(
+                student_prob.log(), teach_prob, reduction="batchmean"
+            ) * (self.temperature**2)
+            mse_loss = F.mse_loss(student_feat, teach_feat, reduction="mean")
+            loss = self.alpha * kl_loss + (1 - self.alpha) * mse_loss
+            total_loss += loss
+        avg_loss = total_loss / len(student_feats)
+        if self.alpha == 0:
+            avg_loss = avg_loss * 0.5
+        return avg_loss
+    def loss(self, inputs: Tensor, data_samples: SampleList) -> dict:
+        """Calculate losses from a batch of inputs and data samples.
+        Args:
+            inputs (Tensor): Input images.
+            data_samples (list[:obj:`SegDataSample`]): The seg data samples.
+                It usually includes information such as `metainfo` and
+                `gt_sem_seg`.
+        Returns:
+            dict[str, Tensor]: a dictionary of loss components
+        """
+        x = self.extract_feat(inputs)
+        losses = dict()
+        loss_decode = self._decode_head_forward_train(x, data_samples)
+        losses.update(loss_decode)
+        if self.student_training:
+            losses["distill_loss"] = self.calculate_diltill_loss(inputs)
+        if self.with_auxiliary_head:
+            loss_aux = self._auxiliary_head_forward_train(x, data_samples)
+            losses.update(loss_aux)
+        return losses
+    def predict(self, inputs: Tensor, data_samples: OptSampleList = None) -> SampleList:
+        """Predict results from a batch of inputs and data samples with post-
+        processing.
+        Args:
+            inputs (Tensor): Inputs with shape (N, C, H, W).
+            data_samples (List[:obj:`SegDataSample`], optional): The seg data
+                samples. It usually includes information such as `metainfo`
+                and `gt_sem_seg`.
+        Returns:
+            list[:obj:`SegDataSample`]: Segmentation results of the
+            input images. Each SegDataSample usually contain:
+            - ``pred_sem_seg``(PixelData): Prediction of semantic segmentation.
+            - ``seg_logits``(PixelData): Predicted logits of semantic
+                segmentation before normalization.
+        """
+        if data_samples is not None:
+            batch_img_metas = [data_sample.metainfo for data_sample in data_samples]
+        else:
+            batch_img_metas = [
+                dict(
+                    ori_shape=inputs.shape[2:],
+                    img_shape=inputs.shape[2:],
+                    pad_shape=inputs.shape[2:],
+                    padding_size=[0, 0, 0, 0],
+                )
+            ] * inputs.shape[0]
+        seg_logits = self.inference(inputs, batch_img_metas)
+        return self.postprocess_result(seg_logits, data_samples)
+    def _forward(self, inputs: Tensor, data_samples: OptSampleList = None) -> Tensor:
+        """Network forward process.
+        Args:
+            inputs (Tensor): Inputs with shape (N, C, H, W).
+            data_samples (List[:obj:`SegDataSample`]): The seg
+                data samples. It usually includes information such
+                as `metainfo` and `gt_sem_seg`.
+        Returns:
+            Tensor: Forward output of model without any post-processes.
+        """
+        x = self.extract_feat(inputs)
+        return self.decode_head.forward(x)
+    def slide_inference(self, inputs: Tensor, batch_img_metas: List[dict]) -> Tensor:
+        """Inference by sliding-window with overlap.
+        If h_crop > h_img or w_crop > w_img, the small patch will be used to
+        decode without padding.
+        Args:
+            inputs (tensor): the tensor should have a shape NxCxHxW,
+                which contains all images in the batch.
+            batch_img_metas (List[dict]): List of image metainfo where each may
+                also contain: 'img_shape', 'scale_factor', 'flip', 'img_path',
+                'ori_shape', and 'pad_shape'.
+                For details on the values of these keys see
+                `mmseg/datasets/pipelines/formatting.py:PackSegInputs`.
+        Returns:
+            Tensor: The segmentation results, seg_logits from model of each
+                input image.
+        """
+        h_stride, w_stride = self.test_cfg.stride
+        h_crop, w_crop = self.test_cfg.crop_size
+        batch_size, _, h_img, w_img = inputs.size()
+        out_channels = self.out_channels
+        h_grids = max(h_img - h_crop + h_stride - 1, 0) // h_stride + 1
+        w_grids = max(w_img - w_crop + w_stride - 1, 0) // w_stride + 1
+        preds = inputs.new_zeros((batch_size, out_channels, h_img, w_img))
+        count_mat = inputs.new_zeros((batch_size, 1, h_img, w_img))
+        for h_idx in range(h_grids):
+            for w_idx in range(w_grids):
+                y1 = h_idx * h_stride
+                x1 = w_idx * w_stride
+                y2 = min(y1 + h_crop, h_img)
+                x2 = min(x1 + w_crop, w_img)
+                y1 = max(y2 - h_crop, 0)
+                x1 = max(x2 - w_crop, 0)
+                crop_img = inputs[:, :, y1:y2, x1:x2]
+                # change the image shape to patch shape
+                batch_img_metas[0]["img_shape"] = crop_img.shape[2:]
+                # the output of encode_decode is seg logits tensor map
+                # with shape [N, C, H, W]
+                crop_seg_logit = self.encode_decode(crop_img, batch_img_metas)
+                preds += F.pad(
+                    crop_seg_logit,
+                    (
+                        int(x1),
+                        int(preds.shape[3] - x2),
+                        int(y1),
+                        int(preds.shape[2] - y2),
+                    ),
+                )
+                count_mat[:, :, y1:y2, x1:x2] += 1
+        assert (count_mat == 0).sum() == 0
+        seg_logits = preds / count_mat
+        return seg_logits
+    def whole_inference(self, inputs: Tensor, batch_img_metas: List[dict]) -> Tensor:
+        """Inference with full image.
+        Args:
+            inputs (Tensor): The tensor should have a shape NxCxHxW, which
+                contains all images in the batch.
+            batch_img_metas (List[dict]): List of image metainfo where each may
+                also contain: 'img_shape', 'scale_factor', 'flip', 'img_path',
+                'ori_shape', and 'pad_shape'.
+                For details on the values of these keys see
+                `mmseg/datasets/pipelines/formatting.py:PackSegInputs`.
+        Returns:
+            Tensor: The segmentation results, seg_logits from model of each
+                input image.
+        """
+        seg_logits = self.encode_decode(inputs, batch_img_metas)
+        return seg_logits
+    def inference(self, inputs: Tensor, batch_img_metas: List[dict]) -> Tensor:
+        """Inference with slide/whole style.
+        Args:
+            inputs (Tensor): The input image of shape (N, 3, H, W).
+            batch_img_metas (List[dict]): List of image metainfo where each may
+                also contain: 'img_shape', 'scale_factor', 'flip', 'img_path',
+                'ori_shape', 'pad_shape', and 'padding_size'.
+                For details on the values of these keys see
+                `mmseg/datasets/pipelines/formatting.py:PackSegInputs`.
+        Returns:
+            Tensor: The segmentation results, seg_logits from model of each
+                input image.
+        """
+        assert self.test_cfg.get("mode", "whole") in ["slide", "whole"], (
+            f'Only "slide" or "whole" test mode are supported, but got '
+            f'{self.test_cfg["mode"]}.'
+        )
+        ori_shape = batch_img_metas[0]["ori_shape"]
+        if not all(_["ori_shape"] == ori_shape for _ in batch_img_metas):
+            print_log(
+                "Image shapes are different in the batch.",
+                logger="current",
+                level=logging.WARN,
+            )
+        if self.test_cfg.mode == "slide":
+            seg_logit = self.slide_inference(inputs, batch_img_metas)
+        else:
+            seg_logit = self.whole_inference(inputs, batch_img_metas)
+        return seg_logit
+    def aug_test(self, inputs, batch_img_metas, rescale=True):
+        """Test with augmentations.
+        Only rescale=True is supported.
+        """
+        # aug_test rescale all imgs back to ori_shape for now
+        assert rescale
+        # to save memory, we get augmented seg logit inplace
+        seg_logit = self.inference(inputs[0], batch_img_metas[0], rescale)
+        for i in range(1, len(inputs)):
+            cur_seg_logit = self.inference(inputs[i], batch_img_metas[i], rescale)
+            seg_logit += cur_seg_logit
+        seg_logit /= len(inputs)
+        seg_pred = seg_logit.argmax(dim=1)
+        # unravel batch dim
+        seg_pred = list(seg_pred)
+        return seg_pred

requirements/docs.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+docutils==0.16.0
+myst-parser
+-e git+https://github.com/open-mmlab/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
+sphinx==4.0.2
+sphinx_copybutton
+sphinx_markdown_tables
+urllib3<2.0.0

requirements/optional.txt ADDED Viewed

	@@ -0,0 +1,22 @@

+cityscapesscripts
+-e git+https://github.com/openai/CLIP.git@main#egg=clip
+# for vpd model
+diffusers
+einops==0.3.0
+imageio==2.9.0
+imageio-ffmpeg==0.4.2
+invisible-watermark
+kornia==0.6
+-e git+https://github.com/CompVis/stable-diffusion@21f890f#egg=latent-diffusion
+nibabel
+omegaconf==2.1.1
+pudb==2019.2
+pytorch-lightning==1.4.2
+streamlit>=0.73.1
+-e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
+test-tube>=0.7.5
+timm
+torch-fidelity==0.3.0
+torchmetrics==0.6.0
+transformers==4.19.2

requirements/runtime.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+matplotlib
+numpy
+packaging
+prettytable
+scipy

tools/analysis_tools/analyze_logs.py ADDED Viewed

	@@ -0,0 +1,130 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+"""Modified from https://github.com/open-
+mmlab/mmdetection/blob/master/tools/analysis_tools/analyze_logs.py."""
+import argparse
+import json
+from collections import defaultdict
+import matplotlib.pyplot as plt
+import seaborn as sns
+def plot_curve(log_dicts, args):
+    if args.backend is not None:
+        plt.switch_backend(args.backend)
+    sns.set_style(args.style)
+    # if legend is None, use {filename}_{key} as legend
+    legend = args.legend
+    if legend is None:
+        legend = []
+        for json_log in args.json_logs:
+            for metric in args.keys:
+                legend.append(f'{json_log}_{metric}')
+    assert len(legend) == (len(args.json_logs) * len(args.keys))
+    metrics = args.keys
+    num_metrics = len(metrics)
+    for i, log_dict in enumerate(log_dicts):
+        epochs = list(log_dict.keys())
+        for j, metric in enumerate(metrics):
+            print(f'plot curve of {args.json_logs[i]}, metric is {metric}')
+            plot_epochs = []
+            plot_iters = []
+            plot_values = []
+            # In some log files exist lines of validation,
+            # `mode` list is used to only collect iter number
+            # of training line.
+            for epoch in epochs:
+                epoch_logs = log_dict[epoch]
+                if metric not in epoch_logs.keys():
+                    continue
+                if metric in ['mIoU', 'mAcc', 'aAcc']:
+                    plot_epochs.append(epoch)
+                    plot_values.append(epoch_logs[metric][0])
+                else:
+                    for idx in range(len(epoch_logs[metric])):
+                        plot_iters.append(epoch_logs['step'][idx])
+                        plot_values.append(epoch_logs[metric][idx])
+            ax = plt.gca()
+            label = legend[i * num_metrics + j]
+            if metric in ['mIoU', 'mAcc', 'aAcc']:
+                ax.set_xticks(plot_epochs)
+                plt.xlabel('step')
+                plt.plot(plot_epochs, plot_values, label=label, marker='o')
+            else:
+                plt.xlabel('iter')
+                plt.plot(plot_iters, plot_values, label=label, linewidth=0.5)
+        plt.legend()
+        if args.title is not None:
+            plt.title(args.title)
+    if args.out is None:
+        plt.show()
+    else:
+        print(f'save curve to: {args.out}')
+        plt.savefig(args.out)
+        plt.cla()
+def parse_args():
+    parser = argparse.ArgumentParser(description='Analyze Json Log')
+    parser.add_argument(
+        'json_logs',
+        type=str,
+        nargs='+',
+        help='path of train log in json format')
+    parser.add_argument(
+        '--keys',
+        type=str,
+        nargs='+',
+        default=['mIoU'],
+        help='the metric that you want to plot')
+    parser.add_argument('--title', type=str, help='title of figure')
+    parser.add_argument(
+        '--legend',
+        type=str,
+        nargs='+',
+        default=None,
+        help='legend of each plot')
+    parser.add_argument(
+        '--backend', type=str, default=None, help='backend of plt')
+    parser.add_argument(
+        '--style', type=str, default='dark', help='style of plt')
+    parser.add_argument('--out', type=str, default=None)
+    args = parser.parse_args()
+    return args
+def load_json_logs(json_logs):
+    # load and convert json_logs to log_dict, key is step, value is a sub dict
+    # keys of sub dict is different metrics
+    # value of sub dict is a list of corresponding values of all iterations
+    log_dicts = [dict() for _ in json_logs]
+    prev_step = 0
+    for json_log, log_dict in zip(json_logs, log_dicts):
+        with open(json_log) as log_file:
+            for line in log_file:
+                log = json.loads(line.strip())
+                # the final step in json file is 0.
+                if 'step' in log and log['step'] != 0:
+                    step = log['step']
+                    prev_step = step
+                else:
+                    step = prev_step
+                if step not in log_dict:
+                    log_dict[step] = defaultdict(list)
+                for k, v in log.items():
+                    log_dict[step][k].append(v)
+    return log_dicts
+def main():
+    args = parse_args()
+    json_logs = args.json_logs
+    for json_log in json_logs:
+        assert json_log.endswith('.json')
+    log_dicts = load_json_logs(json_logs)
+    plot_curve(log_dicts, args)
+if __name__ == '__main__':
+    main()

tools/analysis_tools/benchmark.py ADDED Viewed

	@@ -0,0 +1,121 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+import time
+import numpy as np
+import torch
+from mmengine import Config
+from mmengine.fileio import dump
+from mmengine.model.utils import revert_sync_batchnorm
+from mmengine.registry import init_default_scope
+from mmengine.runner import Runner, load_checkpoint
+from mmengine.utils import mkdir_or_exist
+from mmseg.registry import MODELS
+def parse_args():
+    parser = argparse.ArgumentParser(description='MMSeg benchmark a model')
+    parser.add_argument('config', help='test config file path')
+    parser.add_argument('checkpoint', help='checkpoint file')
+    parser.add_argument(
+        '--log-interval', type=int, default=50, help='interval of logging')
+    parser.add_argument(
+        '--work-dir',
+        help=('if specified, the results will be dumped '
+              'into the directory as json'))
+    parser.add_argument('--repeat-times', type=int, default=1)
+    args = parser.parse_args()
+    return args
+def main():
+    args = parse_args()
+    cfg = Config.fromfile(args.config)
+    init_default_scope(cfg.get('default_scope', 'mmseg'))
+    timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
+    if args.work_dir is not None:
+        mkdir_or_exist(osp.abspath(args.work_dir))
+        json_file = osp.join(args.work_dir, f'fps_{timestamp}.json')
+    else:
+        # use config filename as default work_dir if cfg.work_dir is None
+        work_dir = osp.join('./work_dirs',
+                            osp.splitext(osp.basename(args.config))[0])
+        mkdir_or_exist(osp.abspath(work_dir))
+        json_file = osp.join(work_dir, f'fps_{timestamp}.json')
+    repeat_times = args.repeat_times
+    # set cudnn_benchmark
+    torch.backends.cudnn.benchmark = False
+    cfg.model.pretrained = None
+    benchmark_dict = dict(config=args.config, unit='img / s')
+    overall_fps_list = []
+    cfg.test_dataloader.batch_size = 1
+    for time_index in range(repeat_times):
+        print(f'Run {time_index + 1}:')
+        # build the dataloader
+        data_loader = Runner.build_dataloader(cfg.test_dataloader)
+        # build the model and load checkpoint
+        cfg.model.train_cfg = None
+        model = MODELS.build(cfg.model)
+        if 'checkpoint' in args and osp.exists(args.checkpoint):
+            load_checkpoint(model, args.checkpoint, map_location='cpu')
+        if torch.cuda.is_available():
+            model = model.cuda()
+        model = revert_sync_batchnorm(model)
+        model.eval()
+        # the first several iterations may be very slow so skip them
+        num_warmup = 5
+        pure_inf_time = 0
+        total_iters = 200
+        # benchmark with 200 batches and take the average
+        for i, data in enumerate(data_loader):
+            data = model.data_preprocessor(data, True)
+            inputs = data['inputs']
+            data_samples = data['data_samples']
+            if torch.cuda.is_available():
+                torch.cuda.synchronize()
+            start_time = time.perf_counter()
+            with torch.no_grad():
+                model(inputs, data_samples, mode='predict')
+            if torch.cuda.is_available():
+                torch.cuda.synchronize()
+            elapsed = time.perf_counter() - start_time
+            if i >= num_warmup:
+                pure_inf_time += elapsed
+                if (i + 1) % args.log_interval == 0:
+                    fps = (i + 1 - num_warmup) / pure_inf_time
+                    print(f'Done image [{i + 1:<3}/ {total_iters}], '
+                          f'fps: {fps:.2f} img / s')
+            if (i + 1) == total_iters:
+                fps = (i + 1 - num_warmup) / pure_inf_time
+                print(f'Overall fps: {fps:.2f} img / s\n')
+                benchmark_dict[f'overall_fps_{time_index + 1}'] = round(fps, 2)
+                overall_fps_list.append(fps)
+                break
+    benchmark_dict['average_fps'] = round(np.mean(overall_fps_list), 2)
+    benchmark_dict['fps_variance'] = round(np.var(overall_fps_list), 4)
+    print(f'Average fps of {repeat_times} evaluations: '
+          f'{benchmark_dict["average_fps"]}')
+    print(f'The variance of {repeat_times} evaluations: '
+          f'{benchmark_dict["fps_variance"]}')
+    dump(benchmark_dict, json_file, indent=4)
+if __name__ == '__main__':
+    main()

tools/analysis_tools/confusion_matrix.py ADDED Viewed

	@@ -0,0 +1,197 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os
+import matplotlib.pyplot as plt
+import numpy as np
+from matplotlib.ticker import MultipleLocator
+from mmengine.config import Config, DictAction
+from mmengine.registry import init_default_scope
+from mmengine.utils import mkdir_or_exist, progressbar
+from PIL import Image
+from mmseg.registry import DATASETS
+init_default_scope('mmseg')
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Generate confusion matrix from segmentation results')
+    parser.add_argument('config', help='test config file path')
+    parser.add_argument(
+        'prediction_path', help='prediction path where test folder result')
+    parser.add_argument(
+        'save_dir', help='directory where confusion matrix will be saved')
+    parser.add_argument(
+        '--show', action='store_true', help='show confusion matrix')
+    parser.add_argument(
+        '--color-theme',
+        default='winter',
+        help='theme of the matrix color map')
+    parser.add_argument(
+        '--title',
+        default='Normalized Confusion Matrix',
+        help='title of the matrix color map')
+    parser.add_argument(
+        '--cfg-options',
+        nargs='+',
+        action=DictAction,
+        help='override some settings in the used config, the key-value pair '
+        'in xxx=yyy format will be merged into config file. If the value to '
+        'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
+        'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
+        'Note that the quotation marks are necessary and that no white space '
+        'is allowed.')
+    args = parser.parse_args()
+    return args
+def calculate_confusion_matrix(dataset, results):
+    """Calculate the confusion matrix.
+    Args:
+        dataset (Dataset): Test or val dataset.
+        results (list[ndarray]): A list of segmentation results in each image.
+    """
+    n = len(dataset.METAINFO['classes'])
+    confusion_matrix = np.zeros(shape=[n, n])
+    assert len(dataset) == len(results)
+    ignore_index = dataset.ignore_index
+    reduce_zero_label = dataset.reduce_zero_label
+    prog_bar = progressbar.ProgressBar(len(results))
+    for idx, per_img_res in enumerate(results):
+        res_segm = per_img_res
+        gt_segm = dataset[idx]['data_samples'] \
+            .gt_sem_seg.data.squeeze().numpy().astype(np.uint8)
+        gt_segm, res_segm = gt_segm.flatten(), res_segm.flatten()
+        if reduce_zero_label:
+            gt_segm = gt_segm - 1
+        to_ignore = gt_segm == ignore_index
+        gt_segm, res_segm = gt_segm[~to_ignore], res_segm[~to_ignore]
+        inds = n * gt_segm + res_segm
+        mat = np.bincount(inds, minlength=n**2).reshape(n, n)
+        confusion_matrix += mat
+        prog_bar.update()
+    return confusion_matrix
+def plot_confusion_matrix(confusion_matrix,
+                          labels,
+                          save_dir=None,
+                          show=True,
+                          title='Normalized Confusion Matrix',
+                          color_theme='OrRd'):
+    """Draw confusion matrix with matplotlib.
+    Args:
+        confusion_matrix (ndarray): The confusion matrix.
+        labels (list[str]): List of class names.
+        save_dir (str|optional): If set, save the confusion matrix plot to the
+            given path. Default: None.
+        show (bool): Whether to show the plot. Default: True.
+        title (str): Title of the plot. Default: `Normalized Confusion Matrix`.
+        color_theme (str): Theme of the matrix color map. Default: `winter`.
+    """
+    # normalize the confusion matrix
+    per_label_sums = confusion_matrix.sum(axis=1)[:, np.newaxis]
+    confusion_matrix = \
+        confusion_matrix.astype(np.float32) / per_label_sums * 100
+    num_classes = len(labels)
+    fig, ax = plt.subplots(
+        figsize=(2 * num_classes, 2 * num_classes * 0.8), dpi=300)
+    cmap = plt.get_cmap(color_theme)
+    im = ax.imshow(confusion_matrix, cmap=cmap)
+    colorbar = plt.colorbar(mappable=im, ax=ax)
+    colorbar.ax.tick_params(labelsize=20)  # 设置 colorbar 标签的字体大小
+    title_font = {'weight': 'bold', 'size': 20}
+    ax.set_title(title, fontdict=title_font)
+    label_font = {'size': 40}
+    plt.ylabel('Ground Truth Label', fontdict=label_font)
+    plt.xlabel('Prediction Label', fontdict=label_font)
+    # draw locator
+    xmajor_locator = MultipleLocator(1)
+    xminor_locator = MultipleLocator(0.5)
+    ax.xaxis.set_major_locator(xmajor_locator)
+    ax.xaxis.set_minor_locator(xminor_locator)
+    ymajor_locator = MultipleLocator(1)
+    yminor_locator = MultipleLocator(0.5)
+    ax.yaxis.set_major_locator(ymajor_locator)
+    ax.yaxis.set_minor_locator(yminor_locator)
+    # draw grid
+    ax.grid(True, which='minor', linestyle='-')
+    # draw label
+    ax.set_xticks(np.arange(num_classes))
+    ax.set_yticks(np.arange(num_classes))
+    ax.set_xticklabels(labels, fontsize=20)
+    ax.set_yticklabels(labels, fontsize=20)
+    ax.tick_params(
+        axis='x', bottom=False, top=True, labelbottom=False, labeltop=True)
+    plt.setp(
+        ax.get_xticklabels(), rotation=45, ha='left', rotation_mode='anchor')
+    # draw confusion matrix value
+    for i in range(num_classes):
+        for j in range(num_classes):
+            ax.text(
+                j,
+                i,
+                '{}%'.format(
+                    round(confusion_matrix[i, j], 2
+                          ) if not np.isnan(confusion_matrix[i, j]) else -1),
+                ha='center',
+                va='center',
+                color='k',
+                size=20)
+    ax.set_ylim(len(confusion_matrix) - 0.5, -0.5)  # matplotlib>3.1.1
+    fig.tight_layout()
+    if save_dir is not None:
+        mkdir_or_exist(save_dir)
+        plt.savefig(
+            os.path.join(save_dir, 'confusion_matrix.png'), format='png')
+    if show:
+        plt.show()
+def main():
+    args = parse_args()
+    cfg = Config.fromfile(args.config)
+    if args.cfg_options is not None:
+        cfg.merge_from_dict(args.cfg_options)
+    results = []
+    for img in sorted(os.listdir(args.prediction_path)):
+        img = os.path.join(args.prediction_path, img)
+        image = Image.open(img)
+        image = np.copy(image)
+        results.append(image)
+    assert isinstance(results, list)
+    if isinstance(results[0], np.ndarray):
+        pass
+    else:
+        raise TypeError('invalid type of prediction results')
+    dataset = DATASETS.build(cfg.test_dataloader.dataset)
+    confusion_matrix = calculate_confusion_matrix(dataset, results)
+    plot_confusion_matrix(
+        confusion_matrix,
+        dataset.METAINFO['classes'],
+        save_dir=args.save_dir,
+        show=args.show,
+        title=args.title,
+        color_theme=args.color_theme)
+if __name__ == '__main__':
+    main()

tools/analysis_tools/get_flops.py ADDED Viewed

	@@ -0,0 +1,126 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import tempfile
+from pathlib import Path
+import torch
+from mmengine import Config, DictAction
+from mmengine.logging import MMLogger
+from mmengine.model import revert_sync_batchnorm
+from mmengine.registry import init_default_scope
+from mmseg.models import BaseSegmentor
+from mmseg.registry import MODELS
+from mmseg.structures import SegDataSample
+from vegseg import models
+try:
+    from mmengine.analysis import get_model_complexity_info
+    from mmengine.analysis.print_helper import _format_size
+except ImportError:
+    raise ImportError('Please upgrade mmengine >= 0.6.0 to use this script.')
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Get the FLOPs of a segmentor')
+    parser.add_argument('config', help='train config file path')
+    parser.add_argument(
+        '--shape',
+        type=int,
+        nargs='+',
+        default=[2048, 1024],
+        help='input image size')
+    parser.add_argument(
+        '--cfg-options',
+        nargs='+',
+        action=DictAction,
+        help='override some settings in the used config, the key-value pair '
+        'in xxx=yyy format will be merged into config file. If the value to '
+        'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
+        'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
+        'Note that the quotation marks are necessary and that no white space '
+        'is allowed.')
+    args = parser.parse_args()
+    return args
+def inference(args: argparse.Namespace, logger: MMLogger) -> dict:
+    config_name = Path(args.config)
+    if not config_name.exists():
+        logger.error(f'Config file {config_name} does not exist')
+    cfg: Config = Config.fromfile(config_name)
+    cfg.work_dir = tempfile.TemporaryDirectory().name
+    cfg.log_level = 'WARN'
+    if args.cfg_options is not None:
+        cfg.merge_from_dict(args.cfg_options)
+    init_default_scope(cfg.get('scope', 'mmseg'))
+    if len(args.shape) == 1:
+        input_shape = (3, args.shape[0], args.shape[0])
+    elif len(args.shape) == 2:
+        input_shape = (3, ) + tuple(args.shape)
+    else:
+        raise ValueError('invalid input shape')
+    result = {}
+    model: BaseSegmentor = MODELS.build(cfg.model)
+    if hasattr(model, 'auxiliary_head'):
+        model.auxiliary_head = None
+    if hasattr(model, 'teach_backbone'):
+        model.teach_backbone = None
+    if torch.cuda.is_available():
+        model.cuda()
+    model = revert_sync_batchnorm(model)
+    result['ori_shape'] = input_shape[-2:]
+    result['pad_shape'] = input_shape[-2:]
+    data_batch = {
+        'inputs': [torch.rand(input_shape)],
+        'data_samples': [SegDataSample(metainfo=result)]
+    }
+    data = model.data_preprocessor(data_batch)
+    model.eval()
+    if cfg.model.decode_head.type in ['MaskFormerHead', 'Mask2FormerHead']:
+        # TODO: Support MaskFormer and Mask2Former
+        raise NotImplementedError('MaskFormer and Mask2Former are not '
+                                  'supported yet.')
+    outputs = get_model_complexity_info(
+        model,
+        input_shape=None,
+        inputs=data['inputs'],
+        show_table=False,
+        show_arch=False)
+    result['flops'] = _format_size(outputs['flops'])
+    result['params'] = _format_size(outputs['params'])
+    result['compute_type'] = 'direct: randomly generate a picture'
+    return result
+def main():
+    args = parse_args()
+    logger = MMLogger.get_instance(name='MMLogger')
+    result = inference(args, logger)
+    split_line = '=' * 30
+    ori_shape = result['ori_shape']
+    pad_shape = result['pad_shape']
+    flops = result['flops']
+    params = result['params']
+    compute_type = result['compute_type']
+    if pad_shape != ori_shape:
+        print(f'{split_line}\nUse size divisor set input shape '
+              f'from {ori_shape} to {pad_shape}')
+    print(f'{split_line}\nCompute type: {compute_type}\n'
+          f'Input shape: {pad_shape}\nFlops: {flops}\n'
+          f'Params: {params}\n{split_line}')
+    print('!!!Please be cautious if you use the results in papers. '
+          'You may need to check if all ops are supported and verify '
+          'that the flops computation is correct.')
+if __name__ == '__main__':
+    main()

tools/analysis_tools/visualization_cam.py ADDED Viewed

	@@ -0,0 +1,127 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+"""Use the pytorch-grad-cam tool to visualize Class Activation Maps (CAM).
+requirement: pip install grad-cam
+"""
+from argparse import ArgumentParser
+import numpy as np
+import torch
+import torch.nn.functional as F
+from mmengine import Config
+from mmengine.model import revert_sync_batchnorm
+from PIL import Image
+from pytorch_grad_cam import GradCAM
+from pytorch_grad_cam.utils.image import preprocess_image, show_cam_on_image
+from mmseg.apis import inference_model, init_model, show_result_pyplot
+from mmseg.utils import register_all_modules
+class SemanticSegmentationTarget:
+    """wrap the model.
+    requirement: pip install grad-cam
+    Args:
+        category (int): Visualization class.
+        mask (ndarray): Mask of class.
+        size (tuple): Image size.
+    """
+    def __init__(self, category, mask, size):
+        self.category = category
+        self.mask = torch.from_numpy(mask)
+        self.size = size
+        if torch.cuda.is_available():
+            self.mask = self.mask.cuda()
+    def __call__(self, model_output):
+        model_output = torch.unsqueeze(model_output, dim=0)
+        model_output = F.interpolate(
+            model_output, size=self.size, mode='bilinear')
+        model_output = torch.squeeze(model_output, dim=0)
+        return (model_output[self.category, :, :] * self.mask).sum()
+def main():
+    parser = ArgumentParser()
+    parser.add_argument('img', help='Image file')
+    parser.add_argument('config', help='Config file')
+    parser.add_argument('checkpoint', help='Checkpoint file')
+    parser.add_argument(
+        '--out-file',
+        default='prediction.png',
+        help='Path to output prediction file')
+    parser.add_argument(
+        '--cam-file', default='vis_cam.png', help='Path to output cam file')
+    parser.add_argument(
+        '--target-layers',
+        default='backbone.layer4[2]',
+        help='Target layers to visualize CAM')
+    parser.add_argument(
+        '--category-index', default='7', help='Category to visualize CAM')
+    parser.add_argument(
+        '--device', default='cuda:0', help='Device used for inference')
+    args = parser.parse_args()
+    # build the model from a config file and a checkpoint file
+    register_all_modules()
+    model = init_model(args.config, args.checkpoint, device=args.device)
+    if args.device == 'cpu':
+        model = revert_sync_batchnorm(model)
+    # test a single image
+    result = inference_model(model, args.img)
+    # show the results
+    show_result_pyplot(
+        model,
+        args.img,
+        result,
+        draw_gt=False,
+        show=False if args.out_file is not None else True,
+        out_file=args.out_file)
+    # result data conversion
+    prediction_data = result.pred_sem_seg.data
+    pre_np_data = prediction_data.cpu().numpy().squeeze(0)
+    target_layers = args.target_layers
+    target_layers = [eval(f'model.{target_layers}')]
+    category = int(args.category_index)
+    mask_float = np.float32(pre_np_data == category)
+    # data processing
+    image = np.array(Image.open(args.img).convert('RGB'))
+    height, width = image.shape[0], image.shape[1]
+    rgb_img = np.float32(image) / 255
+    config = Config.fromfile(args.config)
+    image_mean = config.data_preprocessor['mean']
+    image_std = config.data_preprocessor['std']
+    input_tensor = preprocess_image(
+        rgb_img,
+        mean=[x / 255 for x in image_mean],
+        std=[x / 255 for x in image_std])
+    # Grad CAM(Class Activation Maps)
+    # Can also be LayerCAM, XGradCAM, GradCAMPlusPlus, EigenCAM, EigenGradCAM
+    targets = [
+        SemanticSegmentationTarget(category, mask_float, (height, width))
+    ]
+    with GradCAM(
+            model=model,
+            target_layers=target_layers,
+            use_cuda=torch.cuda.is_available()) as cam:
+        grayscale_cam = cam(input_tensor=input_tensor, targets=targets)[0, :]
+        cam_image = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True)
+        # save cam file
+        Image.fromarray(cam_image).save(args.cam_file)
+if __name__ == '__main__':
+    main()

tools/dataset_converters/chase_db1.py ADDED Viewed

	@@ -0,0 +1,89 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os
+import os.path as osp
+import tempfile
+import zipfile
+import mmcv
+from mmengine.utils import mkdir_or_exist
+CHASE_DB1_LEN = 28 * 3
+TRAINING_LEN = 60
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert CHASE_DB1 dataset to mmsegmentation format')
+    parser.add_argument('dataset_path', help='path of CHASEDB1.zip')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+def main():
+    args = parse_args()
+    dataset_path = args.dataset_path
+    if args.out_dir is None:
+        out_dir = osp.join('data', 'CHASE_DB1')
+    else:
+        out_dir = args.out_dir
+    print('Making directories...')
+    mkdir_or_exist(out_dir)
+    mkdir_or_exist(osp.join(out_dir, 'images'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        print('Extracting CHASEDB1.zip...')
+        zip_file = zipfile.ZipFile(dataset_path)
+        zip_file.extractall(tmp_dir)
+        print('Generating training dataset...')
+        assert len(os.listdir(tmp_dir)) == CHASE_DB1_LEN, \
+            f'len(os.listdir(tmp_dir)) != {CHASE_DB1_LEN}'
+        for img_name in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]:
+            img = mmcv.imread(osp.join(tmp_dir, img_name))
+            if osp.splitext(img_name)[1] == '.jpg':
+                mmcv.imwrite(
+                    img,
+                    osp.join(out_dir, 'images', 'training',
+                             osp.splitext(img_name)[0] + '.png'))
+            else:
+                # The annotation img should be divided by 128, because some of
+                # the annotation imgs are not standard. We should set a
+                # threshold to convert the nonstandard annotation imgs. The
+                # value divided by 128 is equivalent to '1 if value >= 128
+                # else 0'
+                mmcv.imwrite(
+                    img[:, :, 0] // 128,
+                    osp.join(out_dir, 'annotations', 'training',
+                             osp.splitext(img_name)[0] + '.png'))
+        for img_name in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]:
+            img = mmcv.imread(osp.join(tmp_dir, img_name))
+            if osp.splitext(img_name)[1] == '.jpg':
+                mmcv.imwrite(
+                    img,
+                    osp.join(out_dir, 'images', 'validation',
+                             osp.splitext(img_name)[0] + '.png'))
+            else:
+                mmcv.imwrite(
+                    img[:, :, 0] // 128,
+                    osp.join(out_dir, 'annotations', 'validation',
+                             osp.splitext(img_name)[0] + '.png'))
+        print('Removing the temporary files...')
+    print('Done!')
+if __name__ == '__main__':
+    main()

tools/dataset_converters/cityscapes.py ADDED Viewed

	@@ -0,0 +1,56 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+from cityscapesscripts.preparation.json2labelImg import json2labelImg
+from mmengine.utils import (mkdir_or_exist, scandir, track_parallel_progress,
+                            track_progress)
+def convert_json_to_label(json_file):
+    label_file = json_file.replace('_polygons.json', '_labelTrainIds.png')
+    json2labelImg(json_file, label_file, 'trainIds')
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert Cityscapes annotations to TrainIds')
+    parser.add_argument('cityscapes_path', help='cityscapes data path')
+    parser.add_argument('--gt-dir', default='gtFine', type=str)
+    parser.add_argument('-o', '--out-dir', help='output path')
+    parser.add_argument(
+        '--nproc', default=1, type=int, help='number of process')
+    args = parser.parse_args()
+    return args
+def main():
+    args = parse_args()
+    cityscapes_path = args.cityscapes_path
+    out_dir = args.out_dir if args.out_dir else cityscapes_path
+    mkdir_or_exist(out_dir)
+    gt_dir = osp.join(cityscapes_path, args.gt_dir)
+    poly_files = []
+    for poly in scandir(gt_dir, '_polygons.json', recursive=True):
+        poly_file = osp.join(gt_dir, poly)
+        poly_files.append(poly_file)
+    if args.nproc > 1:
+        track_parallel_progress(convert_json_to_label, poly_files, args.nproc)
+    else:
+        track_progress(convert_json_to_label, poly_files)
+    split_names = ['train', 'val', 'test']
+    for split in split_names:
+        filenames = []
+        for poly in scandir(
+                osp.join(gt_dir, split), '_polygons.json', recursive=True):
+            filenames.append(poly.replace('_gtFine_polygons.json', ''))
+        with open(osp.join(out_dir, f'{split}.txt'), 'w') as f:
+            f.writelines(f + '\n' for f in filenames)
+if __name__ == '__main__':
+    main()

tools/dataset_converters/coco_stuff10k.py ADDED Viewed

	@@ -0,0 +1,308 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+import shutil
+from functools import partial
+import numpy as np
+from mmengine.utils import (mkdir_or_exist, track_parallel_progress,
+                            track_progress)
+from PIL import Image
+from scipy.io import loadmat
+COCO_LEN = 10000
+clsID_to_trID = {
+    0: 0,
+    1: 1,
+    2: 2,
+    3: 3,
+    4: 4,
+    5: 5,
+    6: 6,
+    7: 7,
+    8: 8,
+    9: 9,
+    10: 10,
+    11: 11,
+    13: 12,
+    14: 13,
+    15: 14,
+    16: 15,
+    17: 16,
+    18: 17,
+    19: 18,
+    20: 19,
+    21: 20,
+    22: 21,
+    23: 22,
+    24: 23,
+    25: 24,
+    27: 25,
+    28: 26,
+    31: 27,
+    32: 28,
+    33: 29,
+    34: 30,
+    35: 31,
+    36: 32,
+    37: 33,
+    38: 34,
+    39: 35,
+    40: 36,
+    41: 37,
+    42: 38,
+    43: 39,
+    44: 40,
+    46: 41,
+    47: 42,
+    48: 43,
+    49: 44,
+    50: 45,
+    51: 46,
+    52: 47,
+    53: 48,
+    54: 49,
+    55: 50,
+    56: 51,
+    57: 52,
+    58: 53,
+    59: 54,
+    60: 55,
+    61: 56,
+    62: 57,
+    63: 58,
+    64: 59,
+    65: 60,
+    67: 61,
+    70: 62,
+    72: 63,
+    73: 64,
+    74: 65,
+    75: 66,
+    76: 67,
+    77: 68,
+    78: 69,
+    79: 70,
+    80: 71,
+    81: 72,
+    82: 73,
+    84: 74,
+    85: 75,
+    86: 76,
+    87: 77,
+    88: 78,
+    89: 79,
+    90: 80,
+    92: 81,
+    93: 82,
+    94: 83,
+    95: 84,
+    96: 85,
+    97: 86,
+    98: 87,
+    99: 88,
+    100: 89,
+    101: 90,
+    102: 91,
+    103: 92,
+    104: 93,
+    105: 94,
+    106: 95,
+    107: 96,
+    108: 97,
+    109: 98,
+    110: 99,
+    111: 100,
+    112: 101,
+    113: 102,
+    114: 103,
+    115: 104,
+    116: 105,
+    117: 106,
+    118: 107,
+    119: 108,
+    120: 109,
+    121: 110,
+    122: 111,
+    123: 112,
+    124: 113,
+    125: 114,
+    126: 115,
+    127: 116,
+    128: 117,
+    129: 118,
+    130: 119,
+    131: 120,
+    132: 121,
+    133: 122,
+    134: 123,
+    135: 124,
+    136: 125,
+    137: 126,
+    138: 127,
+    139: 128,
+    140: 129,
+    141: 130,
+    142: 131,
+    143: 132,
+    144: 133,
+    145: 134,
+    146: 135,
+    147: 136,
+    148: 137,
+    149: 138,
+    150: 139,
+    151: 140,
+    152: 141,
+    153: 142,
+    154: 143,
+    155: 144,
+    156: 145,
+    157: 146,
+    158: 147,
+    159: 148,
+    160: 149,
+    161: 150,
+    162: 151,
+    163: 152,
+    164: 153,
+    165: 154,
+    166: 155,
+    167: 156,
+    168: 157,
+    169: 158,
+    170: 159,
+    171: 160,
+    172: 161,
+    173: 162,
+    174: 163,
+    175: 164,
+    176: 165,
+    177: 166,
+    178: 167,
+    179: 168,
+    180: 169,
+    181: 170,
+    182: 171
+}
+def convert_to_trainID(tuple_path, in_img_dir, in_ann_dir, out_img_dir,
+                       out_mask_dir, is_train):
+    imgpath, maskpath = tuple_path
+    shutil.copyfile(
+        osp.join(in_img_dir, imgpath),
+        osp.join(out_img_dir, 'train2014', imgpath) if is_train else osp.join(
+            out_img_dir, 'test2014', imgpath))
+    annotate = loadmat(osp.join(in_ann_dir, maskpath))
+    mask = annotate['S'].astype(np.uint8)
+    mask_copy = mask.copy()
+    for clsID, trID in clsID_to_trID.items():
+        mask_copy[mask == clsID] = trID
+    seg_filename = osp.join(out_mask_dir, 'train2014',
+                            maskpath.split('.')[0] +
+                            '_labelTrainIds.png') if is_train else osp.join(
+                                out_mask_dir, 'test2014',
+                                maskpath.split('.')[0] + '_labelTrainIds.png')
+    Image.fromarray(mask_copy).save(seg_filename, 'PNG')
+def generate_coco_list(folder):
+    train_list = osp.join(folder, 'imageLists', 'train.txt')
+    test_list = osp.join(folder, 'imageLists', 'test.txt')
+    train_paths = []
+    test_paths = []
+    with open(train_list) as f:
+        for filename in f:
+            basename = filename.strip()
+            imgpath = basename + '.jpg'
+            maskpath = basename + '.mat'
+            train_paths.append((imgpath, maskpath))
+    with open(test_list) as f:
+        for filename in f:
+            basename = filename.strip()
+            imgpath = basename + '.jpg'
+            maskpath = basename + '.mat'
+            test_paths.append((imgpath, maskpath))
+    return train_paths, test_paths
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description=\
+        'Convert COCO Stuff 10k annotations to mmsegmentation format')  # noqa
+    parser.add_argument('coco_path', help='coco stuff path')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    parser.add_argument(
+        '--nproc', default=16, type=int, help='number of process')
+    args = parser.parse_args()
+    return args
+def main():
+    args = parse_args()
+    coco_path = args.coco_path
+    nproc = args.nproc
+    out_dir = args.out_dir or coco_path
+    out_img_dir = osp.join(out_dir, 'images')
+    out_mask_dir = osp.join(out_dir, 'annotations')
+    mkdir_or_exist(osp.join(out_img_dir, 'train2014'))
+    mkdir_or_exist(osp.join(out_img_dir, 'test2014'))
+    mkdir_or_exist(osp.join(out_mask_dir, 'train2014'))
+    mkdir_or_exist(osp.join(out_mask_dir, 'test2014'))
+    train_list, test_list = generate_coco_list(coco_path)
+    assert (len(train_list) +
+            len(test_list)) == COCO_LEN, 'Wrong length of list {} & {}'.format(
+                len(train_list), len(test_list))
+    if args.nproc > 1:
+        track_parallel_progress(
+            partial(
+                convert_to_trainID,
+                in_img_dir=osp.join(coco_path, 'images'),
+                in_ann_dir=osp.join(coco_path, 'annotations'),
+                out_img_dir=out_img_dir,
+                out_mask_dir=out_mask_dir,
+                is_train=True),
+            train_list,
+            nproc=nproc)
+        track_parallel_progress(
+            partial(
+                convert_to_trainID,
+                in_img_dir=osp.join(coco_path, 'images'),
+                in_ann_dir=osp.join(coco_path, 'annotations'),
+                out_img_dir=out_img_dir,
+                out_mask_dir=out_mask_dir,
+                is_train=False),
+            test_list,
+            nproc=nproc)
+    else:
+        track_progress(
+            partial(
+                convert_to_trainID,
+                in_img_dir=osp.join(coco_path, 'images'),
+                in_ann_dir=osp.join(coco_path, 'annotations'),
+                out_img_dir=out_img_dir,
+                out_mask_dir=out_mask_dir,
+                is_train=True), train_list)
+        track_progress(
+            partial(
+                convert_to_trainID,
+                in_img_dir=osp.join(coco_path, 'images'),
+                in_ann_dir=osp.join(coco_path, 'annotations'),
+                out_img_dir=out_img_dir,
+                out_mask_dir=out_mask_dir,
+                is_train=False), test_list)
+    print('Done!')
+if __name__ == '__main__':
+    main()

tools/dataset_converters/coco_stuff164k.py ADDED Viewed

	@@ -0,0 +1,265 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+import shutil
+from functools import partial
+from glob import glob
+import numpy as np
+from mmengine.utils import (mkdir_or_exist, track_parallel_progress,
+                            track_progress)
+from PIL import Image
+COCO_LEN = 123287
+clsID_to_trID = {
+    0: 0,
+    1: 1,
+    2: 2,
+    3: 3,
+    4: 4,
+    5: 5,
+    6: 6,
+    7: 7,
+    8: 8,
+    9: 9,
+    10: 10,
+    12: 11,
+    13: 12,
+    14: 13,
+    15: 14,
+    16: 15,
+    17: 16,
+    18: 17,
+    19: 18,
+    20: 19,
+    21: 20,
+    22: 21,
+    23: 22,
+    24: 23,
+    26: 24,
+    27: 25,
+    30: 26,
+    31: 27,
+    32: 28,
+    33: 29,
+    34: 30,
+    35: 31,
+    36: 32,
+    37: 33,
+    38: 34,
+    39: 35,
+    40: 36,
+    41: 37,
+    42: 38,
+    43: 39,
+    45: 40,
+    46: 41,
+    47: 42,
+    48: 43,
+    49: 44,
+    50: 45,
+    51: 46,
+    52: 47,
+    53: 48,
+    54: 49,
+    55: 50,
+    56: 51,
+    57: 52,
+    58: 53,
+    59: 54,
+    60: 55,
+    61: 56,
+    62: 57,
+    63: 58,
+    64: 59,
+    66: 60,
+    69: 61,
+    71: 62,
+    72: 63,
+    73: 64,
+    74: 65,
+    75: 66,
+    76: 67,
+    77: 68,
+    78: 69,
+    79: 70,
+    80: 71,
+    81: 72,
+    83: 73,
+    84: 74,
+    85: 75,
+    86: 76,
+    87: 77,
+    88: 78,
+    89: 79,
+    91: 80,
+    92: 81,
+    93: 82,
+    94: 83,
+    95: 84,
+    96: 85,
+    97: 86,
+    98: 87,
+    99: 88,
+    100: 89,
+    101: 90,
+    102: 91,
+    103: 92,
+    104: 93,
+    105: 94,
+    106: 95,
+    107: 96,
+    108: 97,
+    109: 98,
+    110: 99,
+    111: 100,
+    112: 101,
+    113: 102,
+    114: 103,
+    115: 104,
+    116: 105,
+    117: 106,
+    118: 107,
+    119: 108,
+    120: 109,
+    121: 110,
+    122: 111,
+    123: 112,
+    124: 113,
+    125: 114,
+    126: 115,
+    127: 116,
+    128: 117,
+    129: 118,
+    130: 119,
+    131: 120,
+    132: 121,
+    133: 122,
+    134: 123,
+    135: 124,
+    136: 125,
+    137: 126,
+    138: 127,
+    139: 128,
+    140: 129,
+    141: 130,
+    142: 131,
+    143: 132,
+    144: 133,
+    145: 134,
+    146: 135,
+    147: 136,
+    148: 137,
+    149: 138,
+    150: 139,
+    151: 140,
+    152: 141,
+    153: 142,
+    154: 143,
+    155: 144,
+    156: 145,
+    157: 146,
+    158: 147,
+    159: 148,
+    160: 149,
+    161: 150,
+    162: 151,
+    163: 152,
+    164: 153,
+    165: 154,
+    166: 155,
+    167: 156,
+    168: 157,
+    169: 158,
+    170: 159,
+    171: 160,
+    172: 161,
+    173: 162,
+    174: 163,
+    175: 164,
+    176: 165,
+    177: 166,
+    178: 167,
+    179: 168,
+    180: 169,
+    181: 170,
+    255: 255
+}
+def convert_to_trainID(maskpath, out_mask_dir, is_train):
+    mask = np.array(Image.open(maskpath))
+    mask_copy = mask.copy()
+    for clsID, trID in clsID_to_trID.items():
+        mask_copy[mask == clsID] = trID
+    seg_filename = osp.join(
+        out_mask_dir, 'train2017',
+        osp.basename(maskpath).split('.')[0] +
+        '_labelTrainIds.png') if is_train else osp.join(
+            out_mask_dir, 'val2017',
+            osp.basename(maskpath).split('.')[0] + '_labelTrainIds.png')
+    Image.fromarray(mask_copy).save(seg_filename, 'PNG')
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description=\
+        'Convert COCO Stuff 164k annotations to mmsegmentation format')  # noqa
+    parser.add_argument('coco_path', help='coco stuff path')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    parser.add_argument(
+        '--nproc', default=16, type=int, help='number of process')
+    args = parser.parse_args()
+    return args
+def main():
+    args = parse_args()
+    coco_path = args.coco_path
+    nproc = args.nproc
+    out_dir = args.out_dir or coco_path
+    out_img_dir = osp.join(out_dir, 'images')
+    out_mask_dir = osp.join(out_dir, 'annotations')
+    mkdir_or_exist(osp.join(out_mask_dir, 'train2017'))
+    mkdir_or_exist(osp.join(out_mask_dir, 'val2017'))
+    if out_dir != coco_path:
+        shutil.copytree(osp.join(coco_path, 'images'), out_img_dir)
+    train_list = glob(osp.join(coco_path, 'annotations', 'train2017', '*.png'))
+    train_list = [file for file in train_list if '_labelTrainIds' not in file]
+    test_list = glob(osp.join(coco_path, 'annotations', 'val2017', '*.png'))
+    test_list = [file for file in test_list if '_labelTrainIds' not in file]
+    assert (len(train_list) +
+            len(test_list)) == COCO_LEN, 'Wrong length of list {} & {}'.format(
+                len(train_list), len(test_list))
+    if args.nproc > 1:
+        track_parallel_progress(
+            partial(
+                convert_to_trainID, out_mask_dir=out_mask_dir, is_train=True),
+            train_list,
+            nproc=nproc)
+        track_parallel_progress(
+            partial(
+                convert_to_trainID, out_mask_dir=out_mask_dir, is_train=False),
+            test_list,
+            nproc=nproc)
+    else:
+        track_progress(
+            partial(
+                convert_to_trainID, out_mask_dir=out_mask_dir, is_train=True),
+            train_list)
+        track_progress(
+            partial(
+                convert_to_trainID, out_mask_dir=out_mask_dir, is_train=False),
+            test_list)
+    print('Done!')
+if __name__ == '__main__':
+    main()

tools/dataset_converters/hrf.py ADDED Viewed

	@@ -0,0 +1,112 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os
+import os.path as osp
+import tempfile
+import zipfile
+import mmcv
+from mmengine.utils import mkdir_or_exist
+HRF_LEN = 15
+TRAINING_LEN = 5
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert HRF dataset to mmsegmentation format')
+    parser.add_argument('healthy_path', help='the path of healthy.zip')
+    parser.add_argument(
+        'healthy_manualsegm_path', help='the path of healthy_manualsegm.zip')
+    parser.add_argument('glaucoma_path', help='the path of glaucoma.zip')
+    parser.add_argument(
+        'glaucoma_manualsegm_path', help='the path of glaucoma_manualsegm.zip')
+    parser.add_argument(
+        'diabetic_retinopathy_path',
+        help='the path of diabetic_retinopathy.zip')
+    parser.add_argument(
+        'diabetic_retinopathy_manualsegm_path',
+        help='the path of diabetic_retinopathy_manualsegm.zip')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+def main():
+    args = parse_args()
+    images_path = [
+        args.healthy_path, args.glaucoma_path, args.diabetic_retinopathy_path
+    ]
+    annotations_path = [
+        args.healthy_manualsegm_path, args.glaucoma_manualsegm_path,
+        args.diabetic_retinopathy_manualsegm_path
+    ]
+    if args.out_dir is None:
+        out_dir = osp.join('data', 'HRF')
+    else:
+        out_dir = args.out_dir
+    print('Making directories...')
+    mkdir_or_exist(out_dir)
+    mkdir_or_exist(osp.join(out_dir, 'images'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
+    print('Generating images...')
+    for now_path in images_path:
+        with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+            zip_file = zipfile.ZipFile(now_path)
+            zip_file.extractall(tmp_dir)
+            assert len(os.listdir(tmp_dir)) == HRF_LEN, \
+                f'len(os.listdir(tmp_dir)) != {HRF_LEN}'
+            for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]:
+                img = mmcv.imread(osp.join(tmp_dir, filename))
+                mmcv.imwrite(
+                    img,
+                    osp.join(out_dir, 'images', 'training',
+                             osp.splitext(filename)[0] + '.png'))
+            for filename in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]:
+                img = mmcv.imread(osp.join(tmp_dir, filename))
+                mmcv.imwrite(
+                    img,
+                    osp.join(out_dir, 'images', 'validation',
+                             osp.splitext(filename)[0] + '.png'))
+    print('Generating annotations...')
+    for now_path in annotations_path:
+        with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+            zip_file = zipfile.ZipFile(now_path)
+            zip_file.extractall(tmp_dir)
+            assert len(os.listdir(tmp_dir)) == HRF_LEN, \
+                f'len(os.listdir(tmp_dir)) != {HRF_LEN}'
+            for filename in sorted(os.listdir(tmp_dir))[:TRAINING_LEN]:
+                img = mmcv.imread(osp.join(tmp_dir, filename))
+                # The annotation img should be divided by 128, because some of
+                # the annotation imgs are not standard. We should set a
+                # threshold to convert the nonstandard annotation imgs. The
+                # value divided by 128 is equivalent to '1 if value >= 128
+                # else 0'
+                mmcv.imwrite(
+                    img[:, :, 0] // 128,
+                    osp.join(out_dir, 'annotations', 'training',
+                             osp.splitext(filename)[0] + '.png'))
+            for filename in sorted(os.listdir(tmp_dir))[TRAINING_LEN:]:
+                img = mmcv.imread(osp.join(tmp_dir, filename))
+                mmcv.imwrite(
+                    img[:, :, 0] // 128,
+                    osp.join(out_dir, 'annotations', 'validation',
+                             osp.splitext(filename)[0] + '.png'))
+    print('Done!')
+if __name__ == '__main__':
+    main()

tools/dataset_converters/isaid.py ADDED Viewed

	@@ -0,0 +1,246 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import glob
+import os
+import os.path as osp
+import shutil
+import tempfile
+import zipfile
+import mmcv
+import numpy as np
+from mmengine.utils import ProgressBar, mkdir_or_exist
+from PIL import Image
+iSAID_palette = \
+    {
+        0: (0, 0, 0),
+        1: (0, 0, 63),
+        2: (0, 63, 63),
+        3: (0, 63, 0),
+        4: (0, 63, 127),
+        5: (0, 63, 191),
+        6: (0, 63, 255),
+        7: (0, 127, 63),
+        8: (0, 127, 127),
+        9: (0, 0, 127),
+        10: (0, 0, 191),
+        11: (0, 0, 255),
+        12: (0, 191, 127),
+        13: (0, 127, 191),
+        14: (0, 127, 255),
+        15: (0, 100, 155)
+    }
+iSAID_invert_palette = {v: k for k, v in iSAID_palette.items()}
+def iSAID_convert_from_color(arr_3d, palette=iSAID_invert_palette):
+    """RGB-color encoding to grayscale labels."""
+    arr_2d = np.zeros((arr_3d.shape[0], arr_3d.shape[1]), dtype=np.uint8)
+    for c, i in palette.items():
+        m = np.all(arr_3d == np.array(c).reshape(1, 1, 3), axis=2)
+        arr_2d[m] = i
+    return arr_2d
+def slide_crop_image(src_path, out_dir, mode, patch_H, patch_W, overlap):
+    img = np.asarray(Image.open(src_path).convert('RGB'))
+    img_H, img_W, _ = img.shape
+    if img_H < patch_H and img_W > patch_W:
+        img = mmcv.impad(img, shape=(patch_H, img_W), pad_val=0)
+        img_H, img_W, _ = img.shape
+    elif img_H > patch_H and img_W < patch_W:
+        img = mmcv.impad(img, shape=(img_H, patch_W), pad_val=0)
+        img_H, img_W, _ = img.shape
+    elif img_H < patch_H and img_W < patch_W:
+        img = mmcv.impad(img, shape=(patch_H, patch_W), pad_val=0)
+        img_H, img_W, _ = img.shape
+    for x in range(0, img_W, patch_W - overlap):
+        for y in range(0, img_H, patch_H - overlap):
+            x_str = x
+            x_end = x + patch_W
+            if x_end > img_W:
+                diff_x = x_end - img_W
+                x_str -= diff_x
+                x_end = img_W
+            y_str = y
+            y_end = y + patch_H
+            if y_end > img_H:
+                diff_y = y_end - img_H
+                y_str -= diff_y
+                y_end = img_H
+            img_patch = img[y_str:y_end, x_str:x_end, :]
+            img_patch = Image.fromarray(img_patch.astype(np.uint8))
+            image = osp.basename(src_path).split('.')[0] + '_' + str(
+                y_str) + '_' + str(y_end) + '_' + str(x_str) + '_' + str(
+                    x_end) + '.png'
+            # print(image)
+            save_path_image = osp.join(out_dir, 'img_dir', mode, str(image))
+            img_patch.save(save_path_image, format='BMP')
+def slide_crop_label(src_path, out_dir, mode, patch_H, patch_W, overlap):
+    label = mmcv.imread(src_path, channel_order='rgb')
+    label = iSAID_convert_from_color(label)
+    img_H, img_W = label.shape
+    if img_H < patch_H and img_W > patch_W:
+        label = mmcv.impad(label, shape=(patch_H, img_W), pad_val=255)
+        img_H = patch_H
+    elif img_H > patch_H and img_W < patch_W:
+        label = mmcv.impad(label, shape=(img_H, patch_W), pad_val=255)
+        img_W = patch_W
+    elif img_H < patch_H and img_W < patch_W:
+        label = mmcv.impad(label, shape=(patch_H, patch_W), pad_val=255)
+        img_H = patch_H
+        img_W = patch_W
+    for x in range(0, img_W, patch_W - overlap):
+        for y in range(0, img_H, patch_H - overlap):
+            x_str = x
+            x_end = x + patch_W
+            if x_end > img_W:
+                diff_x = x_end - img_W
+                x_str -= diff_x
+                x_end = img_W
+            y_str = y
+            y_end = y + patch_H
+            if y_end > img_H:
+                diff_y = y_end - img_H
+                y_str -= diff_y
+                y_end = img_H
+            lab_patch = label[y_str:y_end, x_str:x_end]
+            lab_patch = Image.fromarray(lab_patch.astype(np.uint8), mode='P')
+            image = osp.basename(src_path).split('.')[0].split(
+                '_')[0] + '_' + str(y_str) + '_' + str(y_end) + '_' + str(
+                    x_str) + '_' + str(x_end) + '_instance_color_RGB' + '.png'
+            lab_patch.save(osp.join(out_dir, 'ann_dir', mode, str(image)))
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert iSAID dataset to mmsegmentation format')
+    parser.add_argument('dataset_path', help='iSAID folder path')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    parser.add_argument(
+        '--patch_width',
+        default=896,
+        type=int,
+        help='Width of the cropped image patch')
+    parser.add_argument(
+        '--patch_height',
+        default=896,
+        type=int,
+        help='Height of the cropped image patch')
+    parser.add_argument(
+        '--overlap_area', default=384, type=int, help='Overlap area')
+    args = parser.parse_args()
+    return args
+def main():
+    args = parse_args()
+    dataset_path = args.dataset_path
+    # image patch width and height
+    patch_H, patch_W = args.patch_width, args.patch_height
+    overlap = args.overlap_area  # overlap area
+    if args.out_dir is None:
+        out_dir = osp.join('data', 'iSAID')
+    else:
+        out_dir = args.out_dir
+    print('Making directories...')
+    mkdir_or_exist(osp.join(out_dir, 'img_dir', 'train'))
+    mkdir_or_exist(osp.join(out_dir, 'img_dir', 'val'))
+    mkdir_or_exist(osp.join(out_dir, 'img_dir', 'test'))
+    mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'train'))
+    mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'val'))
+    mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'test'))
+    assert os.path.exists(os.path.join(dataset_path, 'train')), \
+        f'train is not in {dataset_path}'
+    assert os.path.exists(os.path.join(dataset_path, 'val')), \
+        f'val is not in {dataset_path}'
+    assert os.path.exists(os.path.join(dataset_path, 'test')), \
+        f'test is not in {dataset_path}'
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        for dataset_mode in ['train', 'val', 'test']:
+            # for dataset_mode in [ 'test']:
+            print(f'Extracting  {dataset_mode}ing.zip...')
+            img_zipp_list = glob.glob(
+                os.path.join(dataset_path, dataset_mode, 'images', '*.zip'))
+            print('Find the data', img_zipp_list)
+            for img_zipp in img_zipp_list:
+                zip_file = zipfile.ZipFile(img_zipp)
+                zip_file.extractall(os.path.join(tmp_dir, dataset_mode, 'img'))
+            src_path_list = glob.glob(
+                os.path.join(tmp_dir, dataset_mode, 'img', 'images', '*.png'))
+            src_prog_bar = ProgressBar(len(src_path_list))
+            for i, img_path in enumerate(src_path_list):
+                if dataset_mode != 'test':
+                    slide_crop_image(img_path, out_dir, dataset_mode, patch_H,
+                                     patch_W, overlap)
+                else:
+                    shutil.move(img_path,
+                                os.path.join(out_dir, 'img_dir', dataset_mode))
+                src_prog_bar.update()
+            if dataset_mode != 'test':
+                label_zipp_list = glob.glob(
+                    os.path.join(dataset_path, dataset_mode, 'Semantic_masks',
+                                 '*.zip'))
+                for label_zipp in label_zipp_list:
+                    zip_file = zipfile.ZipFile(label_zipp)
+                    zip_file.extractall(
+                        os.path.join(tmp_dir, dataset_mode, 'lab'))
+                lab_path_list = glob.glob(
+                    os.path.join(tmp_dir, dataset_mode, 'lab', 'images',
+                                 '*.png'))
+                lab_prog_bar = ProgressBar(len(lab_path_list))
+                for i, lab_path in enumerate(lab_path_list):
+                    slide_crop_label(lab_path, out_dir, dataset_mode, patch_H,
+                                     patch_W, overlap)
+                    lab_prog_bar.update()
+        print('Removing the temporary files...')
+    print('Done!')
+if __name__ == '__main__':
+    main()

tools/dataset_converters/levircd.py ADDED Viewed

	@@ -0,0 +1,99 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import glob
+import math
+import os
+import os.path as osp
+import mmcv
+import numpy as np
+from mmengine.utils import ProgressBar
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert levir-cd dataset to mmsegmentation format')
+    parser.add_argument('--dataset_path', help='potsdam folder path')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    parser.add_argument(
+        '--clip_size',
+        type=int,
+        help='clipped size of image after preparation',
+        default=256)
+    parser.add_argument(
+        '--stride_size',
+        type=int,
+        help='stride of clipping original images',
+        default=256)
+    args = parser.parse_args()
+    return args
+def main():
+    args = parse_args()
+    input_folder = args.dataset_path
+    png_files = glob.glob(
+        os.path.join(input_folder, '**/*.png'), recursive=True)
+    output_folder = args.out_dir
+    prog_bar = ProgressBar(len(png_files))
+    for png_file in png_files:
+        new_path = os.path.join(
+            output_folder,
+            os.path.relpath(os.path.dirname(png_file), input_folder))
+        os.makedirs(os.path.dirname(new_path), exist_ok=True)
+        label = False
+        if 'label' in png_file:
+            label = True
+        clip_big_image(png_file, new_path, args, label)
+        prog_bar.update()
+def clip_big_image(image_path, clip_save_dir, args, to_label=False):
+    image = mmcv.imread(image_path)
+    h, w, c = image.shape
+    clip_size = args.clip_size
+    stride_size = args.stride_size
+    num_rows = math.ceil((h - clip_size) / stride_size) if math.ceil(
+        (h - clip_size) /
+        stride_size) * stride_size + clip_size >= h else math.ceil(
+            (h - clip_size) / stride_size) + 1
+    num_cols = math.ceil((w - clip_size) / stride_size) if math.ceil(
+        (w - clip_size) /
+        stride_size) * stride_size + clip_size >= w else math.ceil(
+            (w - clip_size) / stride_size) + 1
+    x, y = np.meshgrid(np.arange(num_cols + 1), np.arange(num_rows + 1))
+    xmin = x * clip_size
+    ymin = y * clip_size
+    xmin = xmin.ravel()
+    ymin = ymin.ravel()
+    xmin_offset = np.where(xmin + clip_size > w, w - xmin - clip_size,
+                           np.zeros_like(xmin))
+    ymin_offset = np.where(ymin + clip_size > h, h - ymin - clip_size,
+                           np.zeros_like(ymin))
+    boxes = np.stack([
+        xmin + xmin_offset, ymin + ymin_offset,
+        np.minimum(xmin + clip_size, w),
+        np.minimum(ymin + clip_size, h)
+    ],
+                     axis=1)
+    if to_label:
+        image[image == 255] = 1
+        image = image[:, :, 0]
+    for box in boxes:
+        start_x, start_y, end_x, end_y = box
+        clipped_image = image[start_y:end_y, start_x:end_x] \
+            if to_label else image[start_y:end_y, start_x:end_x, :]
+        idx = osp.basename(image_path).split('.')[0]
+        mmcv.imwrite(
+            clipped_image.astype(np.uint8),
+            osp.join(clip_save_dir,
+                     f'{idx}_{start_x}_{start_y}_{end_x}_{end_y}.png'))
+if __name__ == '__main__':
+    main()

tools/dataset_converters/loveda.py ADDED Viewed

	@@ -0,0 +1,73 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os
+import os.path as osp
+import shutil
+import tempfile
+import zipfile
+from mmengine.utils import mkdir_or_exist
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert LoveDA dataset to mmsegmentation format')
+    parser.add_argument('dataset_path', help='LoveDA folder path')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+def main():
+    args = parse_args()
+    dataset_path = args.dataset_path
+    if args.out_dir is None:
+        out_dir = osp.join('data', 'loveDA')
+    else:
+        out_dir = args.out_dir
+    print('Making directories...')
+    mkdir_or_exist(out_dir)
+    mkdir_or_exist(osp.join(out_dir, 'img_dir'))
+    mkdir_or_exist(osp.join(out_dir, 'img_dir', 'train'))
+    mkdir_or_exist(osp.join(out_dir, 'img_dir', 'val'))
+    mkdir_or_exist(osp.join(out_dir, 'img_dir', 'test'))
+    mkdir_or_exist(osp.join(out_dir, 'ann_dir'))
+    mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'train'))
+    mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'val'))
+    assert 'Train.zip' in os.listdir(dataset_path), \
+        f'Train.zip is not in {dataset_path}'
+    assert 'Val.zip' in os.listdir(dataset_path), \
+        f'Val.zip is not in {dataset_path}'
+    assert 'Test.zip' in os.listdir(dataset_path), \
+        f'Test.zip is not in {dataset_path}'
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        for dataset in ['Train', 'Val', 'Test']:
+            zip_file = zipfile.ZipFile(
+                os.path.join(dataset_path, dataset + '.zip'))
+            zip_file.extractall(tmp_dir)
+            data_type = dataset.lower()
+            for location in ['Rural', 'Urban']:
+                for image_type in ['images_png', 'masks_png']:
+                    if image_type == 'images_png':
+                        dst = osp.join(out_dir, 'img_dir', data_type)
+                    else:
+                        dst = osp.join(out_dir, 'ann_dir', data_type)
+                    if dataset == 'Test' and image_type == 'masks_png':
+                        continue
+                    else:
+                        src_dir = osp.join(tmp_dir, dataset, location,
+                                           image_type)
+                        src_lst = os.listdir(src_dir)
+                        for file in src_lst:
+                            shutil.move(osp.join(src_dir, file), dst)
+        print('Removing the temporary files...')
+    print('Done!')
+if __name__ == '__main__':
+    main()

tools/dataset_converters/nyu.py ADDED Viewed

	@@ -0,0 +1,89 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+import shutil
+import tempfile
+import zipfile
+from mmengine.utils import mkdir_or_exist
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert NYU Depth dataset to mmsegmentation format')
+    parser.add_argument('raw_data', help='the path of raw data')
+    parser.add_argument(
+        '-o', '--out_dir', help='output path', default='./data/nyu')
+    args = parser.parse_args()
+    return args
+def reorganize(raw_data_dir: str, out_dir: str):
+    """Reorganize NYU Depth dataset files into the required directory
+    structure.
+    Args:
+        raw_data_dir (str): Path to the raw data directory.
+        out_dir (str): Output directory for the organized dataset.
+    """
+    def move_data(data_list, dst_prefix, fname_func):
+        """Move data files from source to destination directory.
+        Args:
+            data_list (list): List of data file paths.
+            dst_prefix (str): Prefix to be added to destination paths.
+            fname_func (callable): Function to process file names
+        """
+        for data_item in data_list:
+            data_item = data_item.strip().strip('/')
+            new_item = fname_func(data_item)
+            shutil.move(
+                osp.join(raw_data_dir, data_item),
+                osp.join(out_dir, dst_prefix, new_item))
+    def process_phase(phase):
+        """Process a dataset phase (e.g., 'train' or 'test')."""
+        with open(osp.join(raw_data_dir, f'nyu_{phase}.txt')) as f:
+            data = filter(lambda x: len(x.strip()) > 0, f.readlines())
+            data = map(lambda x: x.split()[:2], data)
+            images, annos = zip(*data)
+            move_data(images, f'images/{phase}',
+                      lambda x: x.replace('/rgb', ''))
+            move_data(annos, f'annotations/{phase}',
+                      lambda x: x.replace('/sync_depth', ''))
+    process_phase('train')
+    process_phase('test')
+def main():
+    args = parse_args()
+    print('Making directories...')
+    mkdir_or_exist(args.out_dir)
+    for subdir in [
+            'images/train', 'images/test', 'annotations/train',
+            'annotations/test'
+    ]:
+        mkdir_or_exist(osp.join(args.out_dir, subdir))
+    print('Generating images and annotations...')
+    if args.raw_data.endswith('.zip'):
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            zip_file = zipfile.ZipFile(args.raw_data)
+            zip_file.extractall(tmp_dir)
+            reorganize(osp.join(tmp_dir, 'nyu'), args.out_dir)
+    else:
+        assert osp.isdir(
+            args.raw_data
+        ), 'the argument --raw-data should be either a zip file or directory.'
+        reorganize(args.raw_data, args.out_dir)
+    print('Done!')
+if __name__ == '__main__':
+    main()

tools/dataset_converters/pascal_context.py ADDED Viewed

	@@ -0,0 +1,87 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+from functools import partial
+import numpy as np
+from detail import Detail
+from mmengine.utils import mkdir_or_exist, track_progress
+from PIL import Image
+_mapping = np.sort(
+    np.array([
+        0, 2, 259, 260, 415, 324, 9, 258, 144, 18, 19, 22, 23, 397, 25, 284,
+        158, 159, 416, 33, 162, 420, 454, 295, 296, 427, 44, 45, 46, 308, 59,
+        440, 445, 31, 232, 65, 354, 424, 68, 326, 72, 458, 34, 207, 80, 355,
+        85, 347, 220, 349, 360, 98, 187, 104, 105, 366, 189, 368, 113, 115
+    ]))
+_key = np.array(range(len(_mapping))).astype('uint8')
+def generate_labels(img_id, detail, out_dir):
+    def _class_to_index(mask, _mapping, _key):
+        # assert the values
+        values = np.unique(mask)
+        for i in range(len(values)):
+            assert (values[i] in _mapping)
+        index = np.digitize(mask.ravel(), _mapping, right=True)
+        return _key[index].reshape(mask.shape)
+    mask = Image.fromarray(
+        _class_to_index(detail.getMask(img_id), _mapping=_mapping, _key=_key))
+    filename = img_id['file_name']
+    mask.save(osp.join(out_dir, filename.replace('jpg', 'png')))
+    return osp.splitext(osp.basename(filename))[0]
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert PASCAL VOC annotations to mmsegmentation format')
+    parser.add_argument('devkit_path', help='pascal voc devkit path')
+    parser.add_argument('json_path', help='annoation json filepath')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+def main():
+    args = parse_args()
+    devkit_path = args.devkit_path
+    if args.out_dir is None:
+        out_dir = osp.join(devkit_path, 'VOC2010', 'SegmentationClassContext')
+    else:
+        out_dir = args.out_dir
+    json_path = args.json_path
+    mkdir_or_exist(out_dir)
+    img_dir = osp.join(devkit_path, 'VOC2010', 'JPEGImages')
+    train_detail = Detail(json_path, img_dir, 'train')
+    train_ids = train_detail.getImgs()
+    val_detail = Detail(json_path, img_dir, 'val')
+    val_ids = val_detail.getImgs()
+    mkdir_or_exist(
+        osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext'))
+    train_list = track_progress(
+        partial(generate_labels, detail=train_detail, out_dir=out_dir),
+        train_ids)
+    with open(
+            osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext',
+                     'train.txt'), 'w') as f:
+        f.writelines(line + '\n' for line in sorted(train_list))
+    val_list = track_progress(
+        partial(generate_labels, detail=val_detail, out_dir=out_dir), val_ids)
+    with open(
+            osp.join(devkit_path, 'VOC2010/ImageSets/SegmentationContext',
+                     'val.txt'), 'w') as f:
+        f.writelines(line + '\n' for line in sorted(val_list))
+    print('Done!')
+if __name__ == '__main__':
+    main()

tools/dataset_converters/potsdam.py ADDED Viewed

	@@ -0,0 +1,158 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import glob
+import math
+import os
+import os.path as osp
+import tempfile
+import zipfile
+import mmcv
+import numpy as np
+from mmengine.utils import ProgressBar, mkdir_or_exist
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert potsdam dataset to mmsegmentation format')
+    parser.add_argument('dataset_path', help='potsdam folder path')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    parser.add_argument(
+        '--clip_size',
+        type=int,
+        help='clipped size of image after preparation',
+        default=512)
+    parser.add_argument(
+        '--stride_size',
+        type=int,
+        help='stride of clipping original images',
+        default=256)
+    args = parser.parse_args()
+    return args
+def clip_big_image(image_path, clip_save_dir, args, to_label=False):
+    # Original image of Potsdam dataset is very large, thus pre-processing
+    # of them is adopted. Given fixed clip size and stride size to generate
+    # clipped image, the intersection　of width and height is determined.
+    # For example, given one 5120 x 5120 original image, the clip size is
+    # 512 and stride size is 256, thus it would generate 20x20 = 400 images
+    # whose size are all 512x512.
+    image = mmcv.imread(image_path)
+    h, w, c = image.shape
+    clip_size = args.clip_size
+    stride_size = args.stride_size
+    num_rows = math.ceil((h - clip_size) / stride_size) if math.ceil(
+        (h - clip_size) /
+        stride_size) * stride_size + clip_size >= h else math.ceil(
+            (h - clip_size) / stride_size) + 1
+    num_cols = math.ceil((w - clip_size) / stride_size) if math.ceil(
+        (w - clip_size) /
+        stride_size) * stride_size + clip_size >= w else math.ceil(
+            (w - clip_size) / stride_size) + 1
+    x, y = np.meshgrid(np.arange(num_cols + 1), np.arange(num_rows + 1))
+    xmin = x * clip_size
+    ymin = y * clip_size
+    xmin = xmin.ravel()
+    ymin = ymin.ravel()
+    xmin_offset = np.where(xmin + clip_size > w, w - xmin - clip_size,
+                           np.zeros_like(xmin))
+    ymin_offset = np.where(ymin + clip_size > h, h - ymin - clip_size,
+                           np.zeros_like(ymin))
+    boxes = np.stack([
+        xmin + xmin_offset, ymin + ymin_offset,
+        np.minimum(xmin + clip_size, w),
+        np.minimum(ymin + clip_size, h)
+    ],
+                     axis=1)
+    if to_label:
+        color_map = np.array([[0, 0, 0], [255, 255, 255], [255, 0, 0],
+                              [255, 255, 0], [0, 255, 0], [0, 255, 255],
+                              [0, 0, 255]])
+        flatten_v = np.matmul(
+            image.reshape(-1, c),
+            np.array([2, 3, 4]).reshape(3, 1))
+        out = np.zeros_like(flatten_v)
+        for idx, class_color in enumerate(color_map):
+            value_idx = np.matmul(class_color,
+                                  np.array([2, 3, 4]).reshape(3, 1))
+            out[flatten_v == value_idx] = idx
+        image = out.reshape(h, w)
+    for box in boxes:
+        start_x, start_y, end_x, end_y = box
+        clipped_image = image[start_y:end_y,
+                              start_x:end_x] if to_label else image[
+                                  start_y:end_y, start_x:end_x, :]
+        idx_i, idx_j = osp.basename(image_path).split('_')[2:4]
+        mmcv.imwrite(
+            clipped_image.astype(np.uint8),
+            osp.join(
+                clip_save_dir,
+                f'{idx_i}_{idx_j}_{start_x}_{start_y}_{end_x}_{end_y}.png'))
+def main():
+    args = parse_args()
+    splits = {
+        'train': [
+            '2_10', '2_11', '2_12', '3_10', '3_11', '3_12', '4_10', '4_11',
+            '4_12', '5_10', '5_11', '5_12', '6_10', '6_11', '6_12', '6_7',
+            '6_8', '6_9', '7_10', '7_11', '7_12', '7_7', '7_8', '7_9'
+        ],
+        'val': [
+            '5_15', '6_15', '6_13', '3_13', '4_14', '6_14', '5_14', '2_13',
+            '4_15', '2_14', '5_13', '4_13', '3_14', '7_13'
+        ]
+    }
+    dataset_path = args.dataset_path
+    if args.out_dir is None:
+        out_dir = osp.join('data', 'potsdam')
+    else:
+        out_dir = args.out_dir
+    print('Making directories...')
+    mkdir_or_exist(osp.join(out_dir, 'img_dir', 'train'))
+    mkdir_or_exist(osp.join(out_dir, 'img_dir', 'val'))
+    mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'train'))
+    mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'val'))
+    zipp_list = glob.glob(os.path.join(dataset_path, '*.zip'))
+    print('Find the data', zipp_list)
+    for zipp in zipp_list:
+        with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+            zip_file = zipfile.ZipFile(zipp)
+            zip_file.extractall(tmp_dir)
+            src_path_list = glob.glob(os.path.join(tmp_dir, '*.tif'))
+            if not len(src_path_list):
+                sub_tmp_dir = os.path.join(tmp_dir, os.listdir(tmp_dir)[0])
+                src_path_list = glob.glob(os.path.join(sub_tmp_dir, '*.tif'))
+            prog_bar = ProgressBar(len(src_path_list))
+            for i, src_path in enumerate(src_path_list):
+                idx_i, idx_j = osp.basename(src_path).split('_')[2:4]
+                data_type = 'train' if f'{idx_i}_{idx_j}' in splits[
+                    'train'] else 'val'
+                if 'label' in src_path:
+                    dst_dir = osp.join(out_dir, 'ann_dir', data_type)
+                    clip_big_image(src_path, dst_dir, args, to_label=True)
+                else:
+                    dst_dir = osp.join(out_dir, 'img_dir', data_type)
+                    clip_big_image(src_path, dst_dir, args, to_label=False)
+                prog_bar.update()
+    print('Removing the temporary files...')
+    print('Done!')
+if __name__ == '__main__':
+    main()

tools/dataset_converters/refuge.py ADDED Viewed

	@@ -0,0 +1,110 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os
+import os.path as osp
+import tempfile
+import zipfile
+import mmcv
+import numpy as np
+from mmengine.utils import mkdir_or_exist
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert REFUGE dataset to mmsegmentation format')
+    parser.add_argument('--raw_data_root', help='the root path of raw data')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+def extract_img(root: str,
+                cur_dir: str,
+                out_dir: str,
+                mode: str = 'train',
+                file_type: str = 'img') -> None:
+    """_summary_
+    Args:
+       Args:
+        root (str): root where the extracted data is saved
+        cur_dir (cur_dir): dir where the zip_file exists
+        out_dir (str): root dir where the data is saved
+        mode (str, optional): Defaults to 'train'.
+        file_type (str, optional): Defaults to 'img',else to 'mask'.
+    """
+    zip_file = zipfile.ZipFile(cur_dir)
+    zip_file.extractall(root)
+    for cur_dir, dirs, files in os.walk(root):
+        # filter child dirs and directories with "Illustration" and "MACOSX"
+        if len(dirs) == 0 and \
+                cur_dir.split('\\')[-1].find('Illustration') == -1 and \
+                cur_dir.find('MACOSX') == -1:
+            file_names = [
+                file for file in files
+                if file.endswith('.jpg') or file.endswith('.bmp')
+            ]
+            for filename in sorted(file_names):
+                img = mmcv.imread(osp.join(cur_dir, filename))
+                if file_type == 'annotations':
+                    img = img[:, :, 0]
+                    img[np.where(img == 0)] = 1
+                    img[np.where(img == 128)] = 2
+                    img[np.where(img == 255)] = 0
+                mmcv.imwrite(
+                    img,
+                    osp.join(out_dir, file_type, mode,
+                             osp.splitext(filename)[0] + '.png'))
+def main():
+    args = parse_args()
+    raw_data_root = args.raw_data_root
+    if args.out_dir is None:
+        out_dir = osp.join('./data', 'REFUGE')
+    else:
+        out_dir = args.out_dir
+    print('Making directories...')
+    mkdir_or_exist(out_dir)
+    mkdir_or_exist(osp.join(out_dir, 'images'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'test'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'test'))
+    print('Generating images and annotations...')
+    # process data from the child dir on the first rank
+    cur_dir, dirs, files = list(os.walk(raw_data_root))[0]
+    print('====================')
+    files = list(filter(lambda x: x.endswith('.zip'), files))
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        for file in files:
+            # search data folders for training,validation,test
+            mode = list(
+                filter(lambda x: file.lower().find(x) != -1,
+                       ['training', 'test', 'validation']))[0]
+            file_root = osp.join(tmp_dir, file[:-4])
+            file_type = 'images' if file.find('Anno') == -1 and file.find(
+                'GT') == -1 else 'annotations'
+            extract_img(file_root, osp.join(cur_dir, file), out_dir, mode,
+                        file_type)
+    print('Done!')
+if __name__ == '__main__':
+    main()

tools/dataset_converters/stare.py ADDED Viewed

	@@ -0,0 +1,167 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import gzip
+import os
+import os.path as osp
+import tarfile
+import tempfile
+import mmcv
+from mmengine.utils import mkdir_or_exist
+STARE_LEN = 20
+TRAINING_LEN = 10
+def un_gz(src, dst):
+    g_file = gzip.GzipFile(src)
+    with open(dst, 'wb+') as f:
+        f.write(g_file.read())
+    g_file.close()
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert STARE dataset to mmsegmentation format')
+    parser.add_argument('image_path', help='the path of stare-images.tar')
+    parser.add_argument('labels_ah', help='the path of labels-ah.tar')
+    parser.add_argument('labels_vk', help='the path of labels-vk.tar')
+    parser.add_argument('--tmp_dir', help='path of the temporary directory')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    args = parser.parse_args()
+    return args
+def main():
+    args = parse_args()
+    image_path = args.image_path
+    labels_ah = args.labels_ah
+    labels_vk = args.labels_vk
+    if args.out_dir is None:
+        out_dir = osp.join('data', 'STARE')
+    else:
+        out_dir = args.out_dir
+    print('Making directories...')
+    mkdir_or_exist(out_dir)
+    mkdir_or_exist(osp.join(out_dir, 'images'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
+    mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
+    mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        mkdir_or_exist(osp.join(tmp_dir, 'gz'))
+        mkdir_or_exist(osp.join(tmp_dir, 'files'))
+        print('Extracting stare-images.tar...')
+        with tarfile.open(image_path) as f:
+            f.extractall(osp.join(tmp_dir, 'gz'))
+        for filename in os.listdir(osp.join(tmp_dir, 'gz')):
+            un_gz(
+                osp.join(tmp_dir, 'gz', filename),
+                osp.join(tmp_dir, 'files',
+                         osp.splitext(filename)[0]))
+        now_dir = osp.join(tmp_dir, 'files')
+        assert len(os.listdir(now_dir)) == STARE_LEN, \
+            f'len(os.listdir(now_dir)) != {STARE_LEN}'
+        for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]:
+            img = mmcv.imread(osp.join(now_dir, filename))
+            mmcv.imwrite(
+                img,
+                osp.join(out_dir, 'images', 'training',
+                         osp.splitext(filename)[0] + '.png'))
+        for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]:
+            img = mmcv.imread(osp.join(now_dir, filename))
+            mmcv.imwrite(
+                img,
+                osp.join(out_dir, 'images', 'validation',
+                         osp.splitext(filename)[0] + '.png'))
+        print('Removing the temporary files...')
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        mkdir_or_exist(osp.join(tmp_dir, 'gz'))
+        mkdir_or_exist(osp.join(tmp_dir, 'files'))
+        print('Extracting labels-ah.tar...')
+        with tarfile.open(labels_ah) as f:
+            f.extractall(osp.join(tmp_dir, 'gz'))
+        for filename in os.listdir(osp.join(tmp_dir, 'gz')):
+            un_gz(
+                osp.join(tmp_dir, 'gz', filename),
+                osp.join(tmp_dir, 'files',
+                         osp.splitext(filename)[0]))
+        now_dir = osp.join(tmp_dir, 'files')
+        assert len(os.listdir(now_dir)) == STARE_LEN, \
+            f'len(os.listdir(now_dir)) != {STARE_LEN}'
+        for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]:
+            img = mmcv.imread(osp.join(now_dir, filename))
+            # The annotation img should be divided by 128, because some of
+            # the annotation imgs are not standard. We should set a threshold
+            # to convert the nonstandard annotation imgs. The value divided by
+            # 128 equivalent to '1 if value >= 128 else 0'
+            mmcv.imwrite(
+                img[:, :, 0] // 128,
+                osp.join(out_dir, 'annotations', 'training',
+                         osp.splitext(filename)[0] + '.png'))
+        for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]:
+            img = mmcv.imread(osp.join(now_dir, filename))
+            mmcv.imwrite(
+                img[:, :, 0] // 128,
+                osp.join(out_dir, 'annotations', 'validation',
+                         osp.splitext(filename)[0] + '.png'))
+        print('Removing the temporary files...')
+    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
+        mkdir_or_exist(osp.join(tmp_dir, 'gz'))
+        mkdir_or_exist(osp.join(tmp_dir, 'files'))
+        print('Extracting labels-vk.tar...')
+        with tarfile.open(labels_vk) as f:
+            f.extractall(osp.join(tmp_dir, 'gz'))
+        for filename in os.listdir(osp.join(tmp_dir, 'gz')):
+            un_gz(
+                osp.join(tmp_dir, 'gz', filename),
+                osp.join(tmp_dir, 'files',
+                         osp.splitext(filename)[0]))
+        now_dir = osp.join(tmp_dir, 'files')
+        assert len(os.listdir(now_dir)) == STARE_LEN, \
+            f'len(os.listdir(now_dir)) != {STARE_LEN}'
+        for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]:
+            img = mmcv.imread(osp.join(now_dir, filename))
+            mmcv.imwrite(
+                img[:, :, 0] // 128,
+                osp.join(out_dir, 'annotations', 'training',
+                         osp.splitext(filename)[0] + '.png'))
+        for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]:
+            img = mmcv.imread(osp.join(now_dir, filename))
+            mmcv.imwrite(
+                img[:, :, 0] // 128,
+                osp.join(out_dir, 'annotations', 'validation',
+                         osp.splitext(filename)[0] + '.png'))
+        print('Removing the temporary files...')
+    print('Done!')
+if __name__ == '__main__':
+    main()

tools/dataset_converters/synapse.py ADDED Viewed

	@@ -0,0 +1,155 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+import nibabel as nib
+import numpy as np
+from mmengine.utils import mkdir_or_exist
+from PIL import Image
+def read_files_from_txt(txt_path):
+    with open(txt_path) as f:
+        files = f.readlines()
+    files = [file.strip() for file in files]
+    return files
+def read_nii_file(nii_path):
+    img = nib.load(nii_path).get_fdata()
+    return img
+def split_3d_image(img):
+    c, _, _ = img.shape
+    res = []
+    for i in range(c):
+        res.append(img[i, :, :])
+    return res
+def label_mapping(label):
+    """Label mapping from TransUNet paper setting. It only has 9 classes, which
+    are 'background', 'aorta', 'gallbladder', 'left_kidney', 'right_kidney',
+    'liver', 'pancreas', 'spleen', 'stomach', respectively. Other foreground
+    classes in original dataset are all set to background.
+    More details could be found here: https://arxiv.org/abs/2102.04306
+    """
+    maped_label = np.zeros_like(label)
+    maped_label[label == 8] = 1
+    maped_label[label == 4] = 2
+    maped_label[label == 3] = 3
+    maped_label[label == 2] = 4
+    maped_label[label == 6] = 5
+    maped_label[label == 11] = 6
+    maped_label[label == 1] = 7
+    maped_label[label == 7] = 8
+    return maped_label
+def pares_args():
+    parser = argparse.ArgumentParser(
+        description='Convert synapse dataset to mmsegmentation format')
+    parser.add_argument(
+        '--dataset-path', type=str, help='synapse dataset path.')
+    parser.add_argument(
+        '--save-path',
+        default='data/synapse',
+        type=str,
+        help='save path of the dataset.')
+    args = parser.parse_args()
+    return args
+def main():
+    args = pares_args()
+    dataset_path = args.dataset_path
+    save_path = args.save_path
+    if not osp.exists(dataset_path):
+        raise ValueError('The dataset path does not exist. '
+                         'Please enter a correct dataset path.')
+    if not osp.exists(osp.join(dataset_path, 'img')) \
+            or not osp.exists(osp.join(dataset_path, 'label')):
+        raise FileNotFoundError('The dataset structure is incorrect. '
+                                'Please check your dataset.')
+    train_id = read_files_from_txt(osp.join(dataset_path, 'train.txt'))
+    train_id = [idx[3:7] for idx in train_id]
+    test_id = read_files_from_txt(osp.join(dataset_path, 'val.txt'))
+    test_id = [idx[3:7] for idx in test_id]
+    mkdir_or_exist(osp.join(save_path, 'img_dir/train'))
+    mkdir_or_exist(osp.join(save_path, 'img_dir/val'))
+    mkdir_or_exist(osp.join(save_path, 'ann_dir/train'))
+    mkdir_or_exist(osp.join(save_path, 'ann_dir/val'))
+    # It follows data preparation pipeline from here:
+    # https://github.com/Beckschen/TransUNet/tree/main/datasets
+    for i, idx in enumerate(train_id):
+        img_3d = read_nii_file(
+            osp.join(dataset_path, 'img', 'img' + idx + '.nii.gz'))
+        label_3d = read_nii_file(
+            osp.join(dataset_path, 'label', 'label' + idx + '.nii.gz'))
+        img_3d = np.clip(img_3d, -125, 275)
+        img_3d = (img_3d + 125) / 400
+        img_3d *= 255
+        img_3d = np.transpose(img_3d, [2, 0, 1])
+        img_3d = np.flip(img_3d, 2)
+        label_3d = np.transpose(label_3d, [2, 0, 1])
+        label_3d = np.flip(label_3d, 2)
+        label_3d = label_mapping(label_3d)
+        for c in range(img_3d.shape[0]):
+            img = img_3d[c]
+            label = label_3d[c]
+            img = Image.fromarray(img).convert('RGB')
+            label = Image.fromarray(label).convert('L')
+            img.save(
+                osp.join(
+                    save_path, 'img_dir/train', 'case' + idx.zfill(4) +
+                    '_slice' + str(c).zfill(3) + '.jpg'))
+            label.save(
+                osp.join(
+                    save_path, 'ann_dir/train', 'case' + idx.zfill(4) +
+                    '_slice' + str(c).zfill(3) + '.png'))
+    for i, idx in enumerate(test_id):
+        img_3d = read_nii_file(
+            osp.join(dataset_path, 'img', 'img' + idx + '.nii.gz'))
+        label_3d = read_nii_file(
+            osp.join(dataset_path, 'label', 'label' + idx + '.nii.gz'))
+        img_3d = np.clip(img_3d, -125, 275)
+        img_3d = (img_3d + 125) / 400
+        img_3d *= 255
+        img_3d = np.transpose(img_3d, [2, 0, 1])
+        img_3d = np.flip(img_3d, 2)
+        label_3d = np.transpose(label_3d, [2, 0, 1])
+        label_3d = np.flip(label_3d, 2)
+        label_3d = label_mapping(label_3d)
+        for c in range(img_3d.shape[0]):
+            img = img_3d[c]
+            label = label_3d[c]
+            img = Image.fromarray(img).convert('RGB')
+            label = Image.fromarray(label).convert('L')
+            img.save(
+                osp.join(
+                    save_path, 'img_dir/val', 'case' + idx.zfill(4) +
+                    '_slice' + str(c).zfill(3) + '.jpg'))
+            label.save(
+                osp.join(
+                    save_path, 'ann_dir/val', 'case' + idx.zfill(4) +
+                    '_slice' + str(c).zfill(3) + '.png'))
+if __name__ == '__main__':
+    main()

tools/dataset_converters/voc_aug.py ADDED Viewed

	@@ -0,0 +1,92 @@

+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os.path as osp
+from functools import partial
+import numpy as np
+from mmengine.utils import mkdir_or_exist, scandir, track_parallel_progress
+from PIL import Image
+from scipy.io import loadmat
+AUG_LEN = 10582
+def convert_mat(mat_file, in_dir, out_dir):
+    data = loadmat(osp.join(in_dir, mat_file))
+    mask = data['GTcls'][0]['Segmentation'][0].astype(np.uint8)
+    seg_filename = osp.join(out_dir, mat_file.replace('.mat', '.png'))
+    Image.fromarray(mask).save(seg_filename, 'PNG')
+def generate_aug_list(merged_list, excluded_list):
+    return list(set(merged_list) - set(excluded_list))
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert PASCAL VOC annotations to mmsegmentation format')
+    parser.add_argument('devkit_path', help='pascal voc devkit path')
+    parser.add_argument('aug_path', help='pascal voc aug path')
+    parser.add_argument('-o', '--out_dir', help='output path')
+    parser.add_argument(
+        '--nproc', default=1, type=int, help='number of process')
+    args = parser.parse_args()
+    return args
+def main():
+    args = parse_args()
+    devkit_path = args.devkit_path
+    aug_path = args.aug_path
+    nproc = args.nproc
+    if args.out_dir is None:
+        out_dir = osp.join(devkit_path, 'VOC2012', 'SegmentationClassAug')
+    else:
+        out_dir = args.out_dir
+    mkdir_or_exist(out_dir)
+    in_dir = osp.join(aug_path, 'dataset', 'cls')
+    track_parallel_progress(
+        partial(convert_mat, in_dir=in_dir, out_dir=out_dir),
+        list(scandir(in_dir, suffix='.mat')),
+        nproc=nproc)
+    full_aug_list = []
+    with open(osp.join(aug_path, 'dataset', 'train.txt')) as f:
+        full_aug_list += [line.strip() for line in f]
+    with open(osp.join(aug_path, 'dataset', 'val.txt')) as f:
+        full_aug_list += [line.strip() for line in f]
+    with open(
+            osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation',
+                     'train.txt')) as f:
+        ori_train_list = [line.strip() for line in f]
+    with open(
+            osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation',
+                     'val.txt')) as f:
+        val_list = [line.strip() for line in f]
+    aug_train_list = generate_aug_list(ori_train_list + full_aug_list,
+                                       val_list)
+    assert len(aug_train_list) == AUG_LEN, 'len(aug_train_list) != {}'.format(
+        AUG_LEN)
+    with open(
+            osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation',
+                     'trainaug.txt'), 'w') as f:
+        f.writelines(line + '\n' for line in aug_train_list)
+    aug_list = generate_aug_list(full_aug_list, ori_train_list + val_list)
+    assert len(aug_list) == AUG_LEN - len(
+        ori_train_list), 'len(aug_list) != {}'.format(AUG_LEN -
+                                                      len(ori_train_list))
+    with open(
+            osp.join(devkit_path, 'VOC2012/ImageSets/Segmentation', 'aug.txt'),
+            'w') as f:
+        f.writelines(line + '\n' for line in aug_list)
+    print('Done!')
+if __name__ == '__main__':
+    main()

tools/dataset_tools/create_dataset.py ADDED Viewed

	@@ -0,0 +1,185 @@

+import os
+from glob import glob
+from typing import List, Literal
+import shutil
+from PIL import Image
+import json
+import numpy as np
+from rich.progress import track
+import cv2
+from vegseg.datasets import GrassDataset
+from sklearn.model_selection import train_test_split
+import argparse
+def give_color_to_mask(mask: np.ndarray, palette: List[int]) -> Image.Image:
+    """
+    Convert mask to color image
+    Args:
+        mask (np.ndarray): numpy array of shape (H, W)
+        palette (List[int]): list of RGB values
+    return:
+      color_mask (Image.Image): PIL Image of shape (H, W)
+    """
+    im = Image.fromarray(mask).convert("P")
+    im.putpalette(palette)
+    # exit(0)
+    return im
+def get_mask_by_json(filename: str) -> np.ndarray:
+    """
+    Convert json to mask
+    Args:
+        filename (str): path to json file
+    return:
+      mask (np.ndarray): numpy array of shape (H, W)
+    """
+    json_file = json.load(open(filename))
+    img_height = json_file["imageHeight"]
+    img_width = json_file["imageWidth"]
+    mask = np.zeros((img_height, img_width), dtype="int8")
+    for shape in json_file["shapes"]:
+        label = int(shape["label"])
+        label -= 1
+        label = max(label, 0)
+        points = np.array(shape["points"]).astype(np.int32)
+        cv2.fillPoly(mask, [points], label)
+    return mask
+def json_to_image(json_path, image_path):
+    """
+    Convert json to image
+    Args:
+        json_path (str): path to json file
+        image_path (str): path to save image
+    return: None
+    """
+    mask = get_mask_by_json(json_path)
+    palette_list = GrassDataset.METAINFO["palette"]
+    palette = []
+    for palette_item in palette_list:
+        palette.extend(palette_item)
+    color_mask = give_color_to_mask(mask, palette)
+    color_mask.save(image_path)
+def create_dataset(
+    image_paths: List[str],
+    ann_paths: List[str],
+    phase: Literal["train", "val"],
+    output_dir: str,
+):
+    """
+    Args:
+        image_paths (List[str]): list of image paths
+        ann_paths (List[str]): list of annotation paths
+        phase (Literal["train", "val"]): train or val
+        output_dir (str): path to save dataset
+    Return:
+            None
+    """
+    for image_path, ann_path in track(
+        zip(image_paths, ann_paths),
+        description=f"{phase} dataset",
+        total=len(image_paths),
+    ):
+        ann_save_path = os.path.join(
+            output_dir,
+            "ann_dir",
+            phase,
+            os.path.basename(ann_path).replace(".json", ".png"),
+        )
+        # 将image复制到指定路径
+        new_image_path = os.path.join(
+            output_dir, "img_dir", phase, os.path.basename(image_path)
+        )
+        shutil.copy(image_path, new_image_path)
+        # 将ann保存到指定路径
+        json_to_image(ann_path, ann_save_path)
+def split_dataset(
+    root_path: str,
+    output_path: str,
+    split_ratio: float = 0.8,
+    shuffle: bool = True,
+    seed: int = 42,
+) -> None:
+    """
+    Split a dataset into train, test, and validation sets.
+    Args:
+        root_path (str): Path to the dataset. The dataset should be organized as follows:
+            dataset_path/
+                image1.tif
+                image2.tif
+                ...
+                imageN.tif
+                label1.tif
+                label2.tif
+                ...
+                labelN.tif
+        output_path (str): Path to the output directory where the split dataset will be saved.
+        split_ratio (float, optional): Ratio of the dataset to be used for training. Defaults to 0.8.
+        seed (int, optional): Seed for the random number generator. Defaults to 42.
+    """
+    image_paths = glob(os.path.join(root_path, "*.tif"))
+    ann_paths = [filename.replace("tif", "json") for filename in image_paths]
+    assert len(image_paths) == len(
+        ann_paths
+    ), "Number of images and annotations do not match"
+    print(f"images: {len(image_paths)}, annotations: {len(ann_paths)}")
+    image_train, image_test, ann_train, ann_test = train_test_split(
+        image_paths,
+        ann_paths,
+        train_size=split_ratio,
+        random_state=seed,
+        shuffle=shuffle,
+    )
+    print(f"train: {len(image_train)}, test: {len(image_test)}")
+    os.makedirs(os.path.join(output_path, "img_dir", "train"), exist_ok=True)
+    os.makedirs(os.path.join(output_path, "img_dir", "val"), exist_ok=True)
+    os.makedirs(os.path.join(output_path, "ann_dir", "train"), exist_ok=True)
+    os.makedirs(os.path.join(output_path, "ann_dir", "val"), exist_ok=True)
+    create_dataset(image_train, ann_train, "train", output_path)
+    create_dataset(image_test, ann_test, "val", output_path)
+def main():
+    args = argparse.ArgumentParser()
+    args.add_argument("--root", type=str, default="data/raw_data")
+    args.add_argument("--output", type=str, default="data/grass")
+    args.add_argument("--split_ratio", type=float, default=0.8)
+    args.add_argument("--seed", type=int, default=42)
+    args.add_argument("--shuffle", type=bool, default=True)
+    args = args.parse_args()
+    root: str = args.root
+    output_path: str = args.output
+    split_ratio: float = args.split_ratio
+    seed: int = args.seed
+    shuffle: bool = args.shuffle
+    split_dataset(
+        root_path=root,
+        output_path=output_path,
+        split_ratio=split_ratio,
+        shuffle=shuffle,
+        seed=seed,
+    )
+    print("数据集划分完成")
+if __name__ == "__main__":
+    # 使用示例 : python src/tools/split_dataset.py --root data/raw_data --output data/grass --split_ratio 0.8 --seed 42 --shuffle True
+    main()