diff --git a/configs/_base_/datasets/grass.py b/configs/_base_/datasets/grass.py new file mode 100644 index 0000000000000000000000000000000000000000..ef7686d39edea237a4eb6795079f2e2683112f51 --- /dev/null +++ b/configs/_base_/datasets/grass.py @@ -0,0 +1,50 @@ +dataset_type = 'GrassDataset' +data_root = 'data/grass' + +crop_size = (256, 256) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='RandomCrop', crop_size=crop_size), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=crop_size), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] + +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='img_dir/train', + seg_map_path='ann_dir/train'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='img_dir/val', + seg_map_path='ann_dir/val'), + pipeline=test_pipeline)) + +test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=["mIoU", "mDice", "mFscore"],) +test_evaluator = val_evaluator \ No newline at end of file diff --git a/configs/_base_/datasets/l8_biome.py b/configs/_base_/datasets/l8_biome.py new file mode 100644 index 0000000000000000000000000000000000000000..7b02e9a7d10b63d4fa8deb17dfebc25325675d91 --- /dev/null +++ b/configs/_base_/datasets/l8_biome.py @@ -0,0 +1,62 @@ +dataset_type = 'L8BIOMEDataset' +data_root = 'data/l8_biome' + +crop_size = (512, 512) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='RandomCrop', crop_size=crop_size), + dict(type='RandomFlip', prob=0.5), + dict(type='PhotoMetricDistortion'), + dict(type='PackSegInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='Resize', scale=crop_size), + # add loading annotation after ``Resize`` because ground truth + # does not need to do resize data transform + dict(type='LoadAnnotations'), + dict(type='PackSegInputs') +] + +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='InfiniteSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='img_dir/train', + seg_map_path='ann_dir/train'), + pipeline=train_pipeline)) +val_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='img_dir/test', + seg_map_path='ann_dir/test'), + pipeline=test_pipeline)) + +test_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=dict( + type=dataset_type, + data_root=data_root, + data_prefix=dict( + img_path='img_dir/test', + seg_map_path='ann_dir/test'), + pipeline=test_pipeline)) +# test_dataloader = val_dataloader + +val_evaluator = dict(type='IoUMetric', iou_metrics=["mIoU", "mDice", "mFscore"],) +test_evaluator = val_evaluator \ No newline at end of file diff --git a/configs/_base_/default_runtime.py b/configs/_base_/default_runtime.py new file mode 100644 index 0000000000000000000000000000000000000000..272b4d2467992b0f584a3b9d825061c0db474842 --- /dev/null +++ b/configs/_base_/default_runtime.py @@ -0,0 +1,15 @@ +default_scope = 'mmseg' +env_cfg = dict( + cudnn_benchmark=True, + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + dist_cfg=dict(backend='nccl'), +) +vis_backends = [dict(type='LocalVisBackend')] +visualizer = dict( + type='SegLocalVisualizer', vis_backends=vis_backends, name='visualizer') +log_processor = dict(by_epoch=False) +log_level = 'INFO' +load_from = None +resume = False + +tta_model = dict(type='SegTTAModel') diff --git a/configs/_base_/models/convnext_upernet.py b/configs/_base_/models/convnext_upernet.py new file mode 100644 index 0000000000000000000000000000000000000000..16cf762901ccf807b50fe2da049be172c9342180 --- /dev/null +++ b/configs/_base_/models/convnext_upernet.py @@ -0,0 +1,52 @@ +norm_cfg = dict(type='SyncBN', requires_grad=True) +custom_imports = dict(imports='mmpretrain.models', allow_failed_imports=False) +checkpoint_file = 'checkpoints/convnext-base.pth' # noqa +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained=None, + backbone=dict( + type='mmpretrain.ConvNeXt', + arch='base', + out_indices=[0, 1, 2, 3], + drop_path_rate=0.4, + layer_scale_init_value=1.0, + gap_before_final_norm=False, + init_cfg=dict( + type='Pretrained', checkpoint=checkpoint_file, + prefix='backbone.')), + decode_head=dict( + type='UPerHead', + in_channels=[128, 256, 512, 1024], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=512, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) \ No newline at end of file diff --git a/configs/_base_/models/convnextv2_femto_vit_segformer_vegseg.py b/configs/_base_/models/convnextv2_femto_vit_segformer_vegseg.py new file mode 100644 index 0000000000000000000000000000000000000000..c2627807b92ba2ca7f44f47147da4e9f5dd991f2 --- /dev/null +++ b/configs/_base_/models/convnextv2_femto_vit_segformer_vegseg.py @@ -0,0 +1,78 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +data_preprocessor = dict( + type="SegDataPreProcessor", + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255, +) + +model = dict( + type="DistillEncoderDecoder", + data_preprocessor=data_preprocessor, + pretrained=None, + teach_backbone=dict( + type="mmpretrain.VisionTransformer", + arch="base", + frozen_stages=12, + img_size=256, + patch_size=14, + layer_scale_init_value=1e-5, + out_indices=(2, 5, 8, 11), + out_type="featmap", + init_cfg=dict( + type="Pretrained", + checkpoint="checkpoints/dinov2-base.pth", + prefix="backbone", + ), + ), + backbone=dict( + type="mmpretrain.ConvNeXt", + arch='femto', + out_indices=[0, 1, 2, 3], + drop_path_rate=0.1, + layer_scale_init_value=0., + gap_before_final_norm=False, + use_grn=True, + init_cfg=dict( + type="Pretrained", + checkpoint="checkpoints/convnextv2_femote.pth", + prefix="backbone", + ), + ), + fam=dict( + type="FAM", + in_channels=[48, 96, 192, 384], + out_channels=768, + output_size=19, + ), + decode_head=dict( + type="SegformerHead", + in_channels=[768, 768, 768, 768], + in_index=[0, 1, 2, 3], + channels=256, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=768, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/configs/_base_/models/deeplabv3_unet_s5-d16.py b/configs/_base_/models/deeplabv3_unet_s5-d16.py new file mode 100644 index 0000000000000000000000000000000000000000..91a29b8ba07a76ef0141422f1050a2c9b86947a7 --- /dev/null +++ b/configs/_base_/models/deeplabv3_unet_s5-d16.py @@ -0,0 +1,58 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained=None, + backbone=dict( + type='UNet', + in_channels=3, + base_channels=64, + num_stages=5, + strides=(1, 1, 1, 1, 1), + enc_num_convs=(2, 2, 2, 2, 2), + dec_num_convs=(2, 2, 2, 2), + downsamples=(True, True, True, True), + enc_dilations=(1, 1, 1, 1, 1), + dec_dilations=(1, 1, 1, 1), + with_cp=False, + conv_cfg=None, + norm_cfg=norm_cfg, + act_cfg=dict(type='ReLU'), + upsample_cfg=dict(type='InterpConv'), + norm_eval=False), + decode_head=dict( + type='ASPPHead', + in_channels=64, + in_index=4, + channels=16, + dilations=(1, 12, 24, 36), + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=128, + in_index=3, + channels=64, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=2, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) \ No newline at end of file diff --git a/configs/_base_/models/deeplabv3plus_r50-d8.py b/configs/_base_/models/deeplabv3plus_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..dc0ae0d70210d9b57456896439ef611e8bc52553 --- /dev/null +++ b/configs/_base_/models/deeplabv3plus_r50-d8.py @@ -0,0 +1,54 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='DepthwiseSeparableASPPHead', + in_channels=2048, + in_index=3, + channels=512, + dilations=(1, 12, 24, 36), + c1_in_channels=256, + c1_channels=48, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) \ No newline at end of file diff --git a/configs/_base_/models/dinov2_upernet.py b/configs/_base_/models/dinov2_upernet.py new file mode 100644 index 0000000000000000000000000000000000000000..cf006823f251939c5411546e6ffff0ae4ec3fd9d --- /dev/null +++ b/configs/_base_/models/dinov2_upernet.py @@ -0,0 +1,64 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +data_preprocessor = dict( + type="SegDataPreProcessor", + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255, +) +model = dict( + type="EncoderDecoder", + data_preprocessor=data_preprocessor, + pretrained=None, + backbone=dict( + type="mmpretrain.VisionTransformer", + arch="base", + frozen_stages=12, + img_size=256, + patch_size=14, + layer_scale_init_value=1e-5, + out_indices=(2, 5, 8, 11), + out_type = 'featmap', + init_cfg=dict( + type="Pretrained", + checkpoint="checkpoints/dinov2-base.pth", + prefix="backbone", + ), + ), + neck=dict( + type="MultiLevelNeck", + in_channels=[768, 768, 768, 768], + out_channels=768, + scales=[4, 2, 1, 0.5], + ), + decode_head=dict( + type="UPerHead", + in_channels=[768, 768, 768, 768], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=768, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/configs/_base_/models/fcn_r50-d8.py b/configs/_base_/models/fcn_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..08e3d7a86e3720136d2e6b646f9aa19876624417 --- /dev/null +++ b/configs/_base_/models/fcn_r50-d8.py @@ -0,0 +1,53 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='FCNHead', + in_channels=2048, + in_index=3, + channels=512, + num_convs=2, + concat_input=True, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) \ No newline at end of file diff --git a/configs/_base_/models/ktda.py b/configs/_base_/models/ktda.py new file mode 100644 index 0000000000000000000000000000000000000000..bac674b7a4e726e50b8c922611f0fc4823740fa3 --- /dev/null +++ b/configs/_base_/models/ktda.py @@ -0,0 +1,84 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +data_preprocessor = dict( + type="SegDataPreProcessor", + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255, +) + +model = dict( + type="DistillEncoderDecoder", + data_preprocessor=data_preprocessor, + pretrained=None, + teach_backbone=dict( + type="mmpretrain.VisionTransformer", + arch="base", + frozen_stages=12, + img_size=256, + patch_size=14, + layer_scale_init_value=1e-5, + out_indices=(2, 5, 8, 11), + out_type="featmap", + init_cfg=dict( + type="Pretrained", + checkpoint="checkpoints/dinov2-base.pth", + prefix="backbone", + ), + ), + backbone=dict( + type="mmpretrain.ConvNeXt", + arch="base", + out_indices=[0, 1, 2, 3], + drop_path_rate=0.4, + layer_scale_init_value=1.0, + gap_before_final_norm=False, + init_cfg=dict( + type="Pretrained", + checkpoint="checkpoints/convnext-base.pth", + prefix="backbone.", + ), + ), + fam=dict( + type="FAM", + in_channels=[128, 256, 512, 1024], + out_channels=768, + output_size=19, + ), + neck=dict( + type="MultiLevelNeck", + in_channels=[768, 768, 768, 768], + out_channels=768, + scales=[4, 2, 1, 0.5], + ), + decode_head=dict( + type="UPerHead", + in_channels=[768, 768, 768, 768], + in_index=[0, 1, 2, 3], + pool_scales=(1, 2, 3, 6), + channels=512, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=768, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/configs/_base_/models/mask2former_r50_8xb2.py b/configs/_base_/models/mask2former_r50_8xb2.py new file mode 100644 index 0000000000000000000000000000000000000000..428d7ffc0d1546cca4b4a05f80e32d422ae10ef6 --- /dev/null +++ b/configs/_base_/models/mask2former_r50_8xb2.py @@ -0,0 +1,131 @@ +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255, + size=(256,256), + test_cfg=dict(size_divisor=32)) +num_classes = 5 +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + backbone=dict( + type='ResNet', + depth=50, + deep_stem=False, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=-1, + norm_cfg=dict(type='SyncBN', requires_grad=False), + style='pytorch', + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), + decode_head=dict( + type='Mask2FormerHead', + in_channels=[256, 512, 1024, 2048], + strides=[4, 8, 16, 32], + feat_channels=256, + out_channels=256, + num_classes=num_classes, + num_queries=100, + num_transformer_feat_level=3, + align_corners=False, + pixel_decoder=dict( + type='mmdet.MSDeformAttnPixelDecoder', + num_outs=3, + norm_cfg=dict(type='GN', num_groups=32), + act_cfg=dict(type='ReLU'), + encoder=dict( # DeformableDetrTransformerEncoder + num_layers=6, + layer_cfg=dict( # DeformableDetrTransformerEncoderLayer + self_attn_cfg=dict( # MultiScaleDeformableAttention + embed_dims=256, + num_heads=8, + num_levels=3, + num_points=4, + im2col_step=64, + dropout=0.0, + batch_first=True, + norm_cfg=None, + init_cfg=None), + ffn_cfg=dict( + embed_dims=256, + feedforward_channels=1024, + num_fcs=2, + ffn_drop=0.0, + act_cfg=dict(type='ReLU', inplace=True))), + init_cfg=None), + positional_encoding=dict( # SinePositionalEncoding + num_feats=128, normalize=True), + init_cfg=None), + enforce_decoder_input_project=False, + positional_encoding=dict( # SinePositionalEncoding + num_feats=128, normalize=True), + transformer_decoder=dict( # Mask2FormerTransformerDecoder + return_intermediate=True, + num_layers=9, + layer_cfg=dict( # Mask2FormerTransformerDecoderLayer + self_attn_cfg=dict( # MultiheadAttention + embed_dims=256, + num_heads=8, + attn_drop=0.0, + proj_drop=0.0, + dropout_layer=None, + batch_first=True), + cross_attn_cfg=dict( # MultiheadAttention + embed_dims=256, + num_heads=8, + attn_drop=0.0, + proj_drop=0.0, + dropout_layer=None, + batch_first=True), + ffn_cfg=dict( + embed_dims=256, + feedforward_channels=2048, + num_fcs=2, + act_cfg=dict(type='ReLU', inplace=True), + ffn_drop=0.0, + dropout_layer=None, + add_identity=True)), + init_cfg=None), + loss_cls=dict( + type='mmdet.CrossEntropyLoss', + use_sigmoid=False, + loss_weight=2.0, + reduction='mean', + class_weight=[1.0] * num_classes + [0.1]), + loss_mask=dict( + type='mmdet.CrossEntropyLoss', + use_sigmoid=True, + reduction='mean', + loss_weight=5.0), + loss_dice=dict( + type='mmdet.DiceLoss', + use_sigmoid=True, + activate=True, + reduction='mean', + naive_dice=True, + eps=1.0, + loss_weight=5.0), + train_cfg=dict( + num_points=12544, + oversample_ratio=3.0, + importance_sample_ratio=0.75, + assigner=dict( + type='mmdet.HungarianAssigner', + match_costs=[ + dict(type='mmdet.ClassificationCost', weight=2.0), + dict( + type='mmdet.CrossEntropyLossCost', + weight=5.0, + use_sigmoid=True), + dict( + type='mmdet.DiceCost', + weight=5.0, + pred_act=True, + eps=1.0) + ]), + sampler=dict(type='mmdet.MaskPseudoSampler'))), + train_cfg=dict(), + test_cfg=dict(mode='whole')) diff --git a/configs/_base_/models/mask2former_swin-b.py b/configs/_base_/models/mask2former_swin-b.py new file mode 100644 index 0000000000000000000000000000000000000000..aab0f02c4977da4e93bf3630641a4c136dbfc3c0 --- /dev/null +++ b/configs/_base_/models/mask2former_swin-b.py @@ -0,0 +1,158 @@ +pretrained = "https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window12_384_20220317-55b0104a.pth" # noqa + +data_preprocessor = dict( + type="SegDataPreProcessor", + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255, + size=(256, 256), +) +num_classes = 150 + +depths = [2, 2, 18, 2] +model = dict( + type="EncoderDecoder", + data_preprocessor=data_preprocessor, + backbone=dict( + type="SwinTransformer", + pretrain_img_size=384, + embed_dims=128, + depths=depths, + num_heads=[4, 8, 16, 32], + window_size=12, + mlp_ratio=4, + qkv_bias=True, + qk_scale=None, + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.3, + patch_norm=True, + out_indices=(0, 1, 2, 3), + with_cp=False, + frozen_stages=-1, + init_cfg=dict(type="Pretrained", checkpoint=pretrained), + ), + decode_head=dict( + type="Mask2FormerHead", + in_channels=[128, 256, 512, 1024], + strides=[4, 8, 16, 32], + feat_channels=256, + out_channels=256, + num_classes=num_classes, + num_queries=100, + num_transformer_feat_level=3, + align_corners=False, + pixel_decoder=dict( + type="mmdet.MSDeformAttnPixelDecoder", + num_outs=3, + norm_cfg=dict(type="GN", num_groups=32), + act_cfg=dict(type="ReLU"), + encoder=dict( # DeformableDetrTransformerEncoder + num_layers=6, + layer_cfg=dict( # DeformableDetrTransformerEncoderLayer + self_attn_cfg=dict( # MultiScaleDeformableAttention + embed_dims=256, + num_heads=8, + num_levels=3, + num_points=4, + im2col_step=64, + dropout=0.0, + batch_first=True, + norm_cfg=None, + init_cfg=None, + ), + ffn_cfg=dict( + embed_dims=256, + feedforward_channels=1024, + num_fcs=2, + ffn_drop=0.0, + act_cfg=dict(type="ReLU", inplace=True), + ), + ), + init_cfg=None, + ), + positional_encoding=dict( # SinePositionalEncoding + num_feats=128, normalize=True + ), + init_cfg=None, + ), + enforce_decoder_input_project=False, + positional_encoding=dict( # SinePositionalEncoding + num_feats=128, normalize=True + ), + transformer_decoder=dict( # Mask2FormerTransformerDecoder + return_intermediate=True, + num_layers=9, + layer_cfg=dict( # Mask2FormerTransformerDecoderLayer + self_attn_cfg=dict( # MultiheadAttention + embed_dims=256, + num_heads=8, + attn_drop=0.0, + proj_drop=0.0, + dropout_layer=None, + batch_first=True, + ), + cross_attn_cfg=dict( # MultiheadAttention + embed_dims=256, + num_heads=8, + attn_drop=0.0, + proj_drop=0.0, + dropout_layer=None, + batch_first=True, + ), + ffn_cfg=dict( + embed_dims=256, + feedforward_channels=2048, + num_fcs=2, + act_cfg=dict(type="ReLU", inplace=True), + ffn_drop=0.0, + dropout_layer=None, + add_identity=True, + ), + ), + init_cfg=None, + ), + loss_cls=dict( + type="mmdet.CrossEntropyLoss", + use_sigmoid=False, + loss_weight=2.0, + reduction="mean", + class_weight=[1.0] * num_classes + [0.1], + ), + loss_mask=dict( + type="mmdet.CrossEntropyLoss", + use_sigmoid=True, + reduction="mean", + loss_weight=5.0, + ), + loss_dice=dict( + type="mmdet.DiceLoss", + use_sigmoid=True, + activate=True, + reduction="mean", + naive_dice=True, + eps=1.0, + loss_weight=5.0, + ), + train_cfg=dict( + num_points=12544, + oversample_ratio=3.0, + importance_sample_ratio=0.75, + assigner=dict( + type="mmdet.HungarianAssigner", + match_costs=[ + dict(type="mmdet.ClassificationCost", weight=2.0), + dict( + type="mmdet.CrossEntropyLossCost", weight=5.0, use_sigmoid=True + ), + dict(type="mmdet.DiceCost", weight=5.0, pred_act=True, eps=1.0), + ], + ), + sampler=dict(type="mmdet.MaskPseudoSampler"), + ), + ), + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/configs/_base_/models/pspnet_r50-d8.py b/configs/_base_/models/pspnet_r50-d8.py new file mode 100644 index 0000000000000000000000000000000000000000..b4c19b638d081f2f0479e56bcf36512ab6e4df59 --- /dev/null +++ b/configs/_base_/models/pspnet_r50-d8.py @@ -0,0 +1,52 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet50_v1c', + backbone=dict( + type='ResNetV1c', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + dilations=(1, 1, 2, 4), + strides=(1, 2, 1, 1), + norm_cfg=norm_cfg, + norm_eval=False, + style='pytorch', + contract_dilation=True), + decode_head=dict( + type='PSPHead', + in_channels=2048, + in_index=3, + channels=512, + pool_scales=(1, 2, 3, 6), + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + auxiliary_head=dict( + type='FCNHead', + in_channels=1024, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) \ No newline at end of file diff --git a/configs/_base_/models/segformer_mit-b0.py b/configs/_base_/models/segformer_mit-b0.py new file mode 100644 index 0000000000000000000000000000000000000000..f44efa4e10aaf950a779ff11b131bdfff184d85e --- /dev/null +++ b/configs/_base_/models/segformer_mit-b0.py @@ -0,0 +1,42 @@ +# model settings +norm_cfg = dict(type='SyncBN', requires_grad=True) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255) +model = dict( + type='EncoderDecoder', + data_preprocessor=data_preprocessor, + pretrained=None, + backbone=dict( + type='MixVisionTransformer', + in_channels=3, + embed_dims=32, + num_stages=4, + num_layers=[2, 2, 2, 2], + num_heads=[1, 2, 5, 8], + patch_sizes=[7, 3, 3, 3], + sr_ratios=[8, 4, 2, 1], + out_indices=(0, 1, 2, 3), + mlp_ratio=4, + qkv_bias=True, + drop_rate=0.0, + attn_drop_rate=0.0, + drop_path_rate=0.1), + decode_head=dict( + type='SegformerHead', + in_channels=[32, 64, 160, 256], + in_index=[0, 1, 2, 3], + channels=256, + dropout_ratio=0.1, + num_classes=19, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole')) \ No newline at end of file diff --git a/configs/_base_/models/tiny_vit_segformer_vegseg.py b/configs/_base_/models/tiny_vit_segformer_vegseg.py new file mode 100644 index 0000000000000000000000000000000000000000..377fb7b0ba7d9a2f93d4fa41142b0308e63eb624 --- /dev/null +++ b/configs/_base_/models/tiny_vit_segformer_vegseg.py @@ -0,0 +1,78 @@ +# model settings +norm_cfg = dict(type="SyncBN", requires_grad=True) +data_preprocessor = dict( + type="SegDataPreProcessor", + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255, +) + +model = dict( + type="DistillEncoderDecoder", + data_preprocessor=data_preprocessor, + pretrained=None, + teach_backbone=dict( + type="mmpretrain.VisionTransformer", + arch="base", + frozen_stages=12, + img_size=256, + patch_size=14, + layer_scale_init_value=1e-5, + out_indices=(2, 5, 8, 11), + out_type="featmap", + init_cfg=dict( + type="Pretrained", + checkpoint="checkpoints/dinov2-base.pth", + prefix="backbone", + ), + ), + backbone=dict( + type="mmpretrain.TinyViT", + arch="5m", + img_size=(256, 256), + window_size=[7, 7, 14, 7], + out_indices=(0, 1, 2, 3), + drop_path_rate=0.0, + gap_before_final_norm=False, + init_cfg=dict( + type="Pretrained", + checkpoint="checkpoints/tiny_vit_5m_imagenet.pth", + prefix="backbone", + ), + ), + fam=dict( + type="FAM", + in_channels=[128, 160, 320, 320], + out_channels=768, + output_size=19, + ), + decode_head=dict( + type="SegformerHead", + in_channels=[768, 768, 768, 768], + in_index=[0, 1, 2, 3], + channels=256, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ), + auxiliary_head=dict( + type="FCNHead", + in_channels=768, + in_index=2, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=norm_cfg, + align_corners=False, + loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4), + ), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode="whole"), +) diff --git a/configs/_base_/schedules/grass_schedule.py b/configs/_base_/schedules/grass_schedule.py new file mode 100644 index 0000000000000000000000000000000000000000..07df256c2ed75e38c152bafc314db1432de89b4e --- /dev/null +++ b/configs/_base_/schedules/grass_schedule.py @@ -0,0 +1,43 @@ +# optimizer +optim_wrapper = dict( + type="OptimWrapper", + optimizer=dict(type="AdamW", lr=0.0006, betas=(0.9, 0.999), weight_decay=0.01), + paramwise_cfg=dict( + custom_keys={ + "absolute_pos_embed": dict(decay_mult=0.0), + "relative_position_bias_table": dict(decay_mult=0.0), + "norm": dict(decay_mult=0.0), + } + ), +) +# learning policy +param_scheduler = [ + dict(type="LinearLR", start_factor=1e-3, by_epoch=False, begin=0, end=230*5), + dict( + type="PolyLR", + eta_min=0.0, + power=0.9, + begin=230*5, + end=23000, + by_epoch=False, + ), +] +# training schedule for 40k +train_cfg = dict(type="IterBasedTrainLoop", max_iters=23000, val_interval=230) +val_cfg = dict(type="ValLoop") +test_cfg = dict(type="TestLoop") +default_hooks = dict( + timer=dict(type="IterTimerHook"), + logger=dict(type="LoggerHook", interval=230, log_metric_by_epoch=False), + param_scheduler=dict(type="ParamSchedulerHook"), + checkpoint=dict( + type="CheckpointHook", + by_epoch=False, + interval=230, + save_best=["mIoU"], + rule=["greater"], + max_keep_ckpts=1, + ), + sampler_seed=dict(type="DistSamplerSeedHook"), + visualization=dict(type="SegVisualizationHook"), +) diff --git a/configs/_base_/schedules/other_dataset_scedule.py b/configs/_base_/schedules/other_dataset_scedule.py new file mode 100644 index 0000000000000000000000000000000000000000..1b7d6ef80c2951dad56df7e27e71997826d209e9 --- /dev/null +++ b/configs/_base_/schedules/other_dataset_scedule.py @@ -0,0 +1,43 @@ +# optimizer +optim_wrapper = dict( + type="OptimWrapper", + optimizer=dict(type="AdamW", lr=0.0006, betas=(0.9, 0.999), weight_decay=0.01), + paramwise_cfg=dict( + custom_keys={ + "absolute_pos_embed": dict(decay_mult=0.0), + "relative_position_bias_table": dict(decay_mult=0.0), + "norm": dict(decay_mult=0.0), + } + ), +) +# learning policy +param_scheduler = [ + dict(type="LinearLR", start_factor=1e-3, by_epoch=False, begin=0, end=2000), + dict( + type="PolyLR", + eta_min=0.0, + power=0.9, + begin=1000, + end=80000, + by_epoch=False, + ), +] +# training schedule for 40k +train_cfg = dict(type="IterBasedTrainLoop", max_iters=80000, val_interval=1000) +val_cfg = dict(type="ValLoop") +test_cfg = dict(type="TestLoop") +default_hooks = dict( + timer=dict(type="IterTimerHook"), + logger=dict(type="LoggerHook", interval=1000, log_metric_by_epoch=False), + param_scheduler=dict(type="ParamSchedulerHook"), + checkpoint=dict( + type="CheckpointHook", + by_epoch=False, + interval=1000, + save_best=["mIoU"], + rule=["greater"], + max_keep_ckpts=1, + ), + sampler_seed=dict(type="DistSamplerSeedHook"), + visualization=dict(type="SegVisualizationHook"), +) diff --git a/configs/_base_/schedules/schedule_160k.py b/configs/_base_/schedules/schedule_160k.py new file mode 100644 index 0000000000000000000000000000000000000000..60d7bec76244e86ec4635173a45985d4f7023e74 --- /dev/null +++ b/configs/_base_/schedules/schedule_160k.py @@ -0,0 +1,25 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=0, + end=160000, + by_epoch=False) +] +# training schedule for 160k +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=160000, val_interval=16000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=16000), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) diff --git a/configs/_base_/schedules/schedule_20k.py b/configs/_base_/schedules/schedule_20k.py new file mode 100644 index 0000000000000000000000000000000000000000..e809e3e88092446b8ee233ebbc6feccabdbccaac --- /dev/null +++ b/configs/_base_/schedules/schedule_20k.py @@ -0,0 +1,24 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=0, + end=20000, + by_epoch=False) +] +# training schedule for 20k +train_cfg = dict(type='IterBasedTrainLoop', max_iters=20000, val_interval=2000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=2000), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) diff --git a/configs/_base_/schedules/schedule_240k.py b/configs/_base_/schedules/schedule_240k.py new file mode 100644 index 0000000000000000000000000000000000000000..feb2ce9637fd539a28881d1ddf516e5bc3e58be5 --- /dev/null +++ b/configs/_base_/schedules/schedule_240k.py @@ -0,0 +1,25 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=0, + end=240000, + by_epoch=False) +] +# training schedule for 240k +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=240000, val_interval=24000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=24000), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) diff --git a/configs/_base_/schedules/schedule_25k.py b/configs/_base_/schedules/schedule_25k.py new file mode 100644 index 0000000000000000000000000000000000000000..825e141ed12cd36567030de94c6bd081718510ee --- /dev/null +++ b/configs/_base_/schedules/schedule_25k.py @@ -0,0 +1,28 @@ +# optimizer +optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.1) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='LinearLR', start_factor=3e-2, begin=0, end=12000, + by_epoch=False), + dict( + type='PolyLRRatio', + eta_min_ratio=3e-2, + power=0.9, + begin=12000, + end=24000, + by_epoch=False), + dict(type='ConstantLR', by_epoch=False, factor=1, begin=24000, end=25000) +] +# training schedule for 25k +train_cfg = dict(type='IterBasedTrainLoop', max_iters=25000, val_interval=1000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=2000), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) diff --git a/configs/_base_/schedules/schedule_320k.py b/configs/_base_/schedules/schedule_320k.py new file mode 100644 index 0000000000000000000000000000000000000000..70b063afc9f89d62cb2f2dce8a6f225ad4d39220 --- /dev/null +++ b/configs/_base_/schedules/schedule_320k.py @@ -0,0 +1,25 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=0, + end=320000, + by_epoch=False) +] +# training schedule for 320k +train_cfg = dict( + type='IterBasedTrainLoop', max_iters=320000, val_interval=32000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=32000), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) diff --git a/configs/_base_/schedules/schedule_40k.py b/configs/_base_/schedules/schedule_40k.py new file mode 100644 index 0000000000000000000000000000000000000000..4b823339a28cfc19159e4e93603fbf1beb81ac94 --- /dev/null +++ b/configs/_base_/schedules/schedule_40k.py @@ -0,0 +1,24 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=0, + end=40000, + by_epoch=False) +] +# training schedule for 40k +train_cfg = dict(type='IterBasedTrainLoop', max_iters=40000, val_interval=4000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=4000), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) diff --git a/configs/_base_/schedules/schedule_80k.py b/configs/_base_/schedules/schedule_80k.py new file mode 100644 index 0000000000000000000000000000000000000000..0dcd6c4d1bc0158107276fc9abe7d5d62c0880c2 --- /dev/null +++ b/configs/_base_/schedules/schedule_80k.py @@ -0,0 +1,24 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) +# learning policy +param_scheduler = [ + dict( + type='PolyLR', + eta_min=1e-4, + power=0.9, + begin=0, + end=80000, + by_epoch=False) +] +# training schedule for 80k +train_cfg = dict(type='IterBasedTrainLoop', max_iters=80000, val_interval=8000) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +default_hooks = dict( + timer=dict(type='IterTimerHook'), + logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), + param_scheduler=dict(type='ParamSchedulerHook'), + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=8000), + sampler_seed=dict(type='DistSamplerSeedHook'), + visualization=dict(type='SegVisualizationHook')) diff --git a/configs/_base_/schedules/water_schedule.py b/configs/_base_/schedules/water_schedule.py new file mode 100644 index 0000000000000000000000000000000000000000..9f54b404f4b643baaa576a8ed377599f636593b3 --- /dev/null +++ b/configs/_base_/schedules/water_schedule.py @@ -0,0 +1,43 @@ +# optimizer +optim_wrapper = dict( + type="OptimWrapper", + optimizer=dict(type="AdamW", lr=0.0006, betas=(0.9, 0.999), weight_decay=0.01), + paramwise_cfg=dict( + custom_keys={ + "absolute_pos_embed": dict(decay_mult=0.0), + "relative_position_bias_table": dict(decay_mult=0.0), + "norm": dict(decay_mult=0.0), + } + ), +) +# learning policy +param_scheduler = [ + dict(type="LinearLR", start_factor=1e-3, by_epoch=False, begin=0, end=760*5), + dict( + type="PolyLR", + eta_min=0.0, + power=0.9, + begin=760*5, + end=76000, + by_epoch=False, + ), +] +# training schedule for 40k +train_cfg = dict(type="IterBasedTrainLoop", max_iters=76000, val_interval=760) +val_cfg = dict(type="ValLoop") +test_cfg = dict(type="TestLoop") +default_hooks = dict( + timer=dict(type="IterTimerHook"), + logger=dict(type="LoggerHook", interval=760, log_metric_by_epoch=False), + param_scheduler=dict(type="ParamSchedulerHook"), + checkpoint=dict( + type="CheckpointHook", + by_epoch=False, + interval=760, + save_best=["mIoU"], + rule=["greater"], + max_keep_ckpts=1, + ), + sampler_seed=dict(type="DistSamplerSeedHook"), + visualization=dict(type="SegVisualizationHook"), +) diff --git a/configs/convnext/convnext-v2-femto_upernet.py b/configs/convnext/convnext-v2-femto_upernet.py new file mode 100644 index 0000000000000000000000000000000000000000..440e8c8b544734a28166aaf75a4d2ed0b52d2756 --- /dev/null +++ b/configs/convnext/convnext-v2-femto_upernet.py @@ -0,0 +1,13 @@ +_base_ = [ + "../_base_/models/convnext_upernet.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=5), + auxiliary_head=dict(num_classes=5) +) \ No newline at end of file diff --git a/configs/convnext/convnext_b_upernet.py b/configs/convnext/convnext_b_upernet.py new file mode 100644 index 0000000000000000000000000000000000000000..440e8c8b544734a28166aaf75a4d2ed0b52d2756 --- /dev/null +++ b/configs/convnext/convnext_b_upernet.py @@ -0,0 +1,13 @@ +_base_ = [ + "../_base_/models/convnext_upernet.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=5), + auxiliary_head=dict(num_classes=5) +) \ No newline at end of file diff --git a/configs/deeplabv3plus/deeplabv3plus_r101.py b/configs/deeplabv3plus/deeplabv3plus_r101.py new file mode 100644 index 0000000000000000000000000000000000000000..63fe3650ede64a613b96a3034b1d2b9461e0c295 --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101.py @@ -0,0 +1,15 @@ +_base_ = [ + "../_base_/models/deeplabv3plus_r50-d8.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet101_v1c', + backbone=dict(depth=101), + decode_head=dict(num_classes=5), + auxiliary_head=dict(num_classes=5) +) \ No newline at end of file diff --git a/configs/deeplabv3plus/deeplabv3plus_r101_water.py b/configs/deeplabv3plus/deeplabv3plus_r101_water.py new file mode 100644 index 0000000000000000000000000000000000000000..106480453d00d962f35fd756e1429e0d67a4895d --- /dev/null +++ b/configs/deeplabv3plus/deeplabv3plus_r101_water.py @@ -0,0 +1,15 @@ +_base_ = [ + "../_base_/models/deeplabv3plus_r50-d8.py", + "../_base_/datasets/water.py", + "../_base_/default_runtime.py", + "../_base_/schedules/water_schedule.py", +] + +data_preprocessor = dict(size=(512, 512)) +model = dict( + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet101_v1c', + backbone=dict(depth=101), + decode_head=dict(num_classes=6), + auxiliary_head=dict(num_classes=6) +) \ No newline at end of file diff --git a/configs/dinov2/dinov2_upernet.py b/configs/dinov2/dinov2_upernet.py new file mode 100644 index 0000000000000000000000000000000000000000..57947526d76888067bd10caa504ddc2e461a4fc6 --- /dev/null +++ b/configs/dinov2/dinov2_upernet.py @@ -0,0 +1,13 @@ +_base_ = [ + "../_base_/models/dinov2_upernet.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=5), + auxiliary_head=dict(num_classes=5) +) diff --git a/configs/dinov2/dinov2_upernet_focal_loss.py b/configs/dinov2/dinov2_upernet_focal_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..2dae8a9e3468392fbb9e400b59faf7ace380acb2 --- /dev/null +++ b/configs/dinov2/dinov2_upernet_focal_loss.py @@ -0,0 +1,31 @@ +_base_ = [ + "../_base_/models/dinov2_upernet.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict( + num_classes=5, + loss_decode=[ + dict(type="FocalLoss", use_sigmoid=True, loss_weight=1.0), + dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ], + init_cfg=dict( + type="Pretrained", + checkpoint="work_dirs/dinov2_b_frozen-simpleAdapter/head.pth", + prefix="decode_head", + ), + ), + auxiliary_head=dict( + num_classes=5, + init_cfg=dict( + type="Pretrained", + checkpoint="work_dirs/dinov2_b_frozen-simpleAdapter/auxiliary_head.pth", + prefix="auxiliary_head", + ), + ), +) diff --git a/configs/dinov2/full_dinov2_upernet.py b/configs/dinov2/full_dinov2_upernet.py new file mode 100644 index 0000000000000000000000000000000000000000..8feab2d4b41764deed3051c7d97503e6e61d45d9 --- /dev/null +++ b/configs/dinov2/full_dinov2_upernet.py @@ -0,0 +1,16 @@ +_base_ = [ + "../_base_/models/dinov2_upernet.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + backbone=dict( + frozen_stages=-1 + ), + decode_head=dict(num_classes=5), + auxiliary_head=dict(num_classes=5) +) diff --git a/configs/fcn/fcn_r101.py b/configs/fcn/fcn_r101.py new file mode 100644 index 0000000000000000000000000000000000000000..0e63ba9576e0773d15a301a8657e66c4dd5de7e7 --- /dev/null +++ b/configs/fcn/fcn_r101.py @@ -0,0 +1,15 @@ +_base_ = [ + "../_base_/models/fcn_r50-d8.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet101_v1c', + backbone=dict(depth=101), + decode_head=dict(num_classes=5), + auxiliary_head=dict(num_classes=5) +) diff --git a/configs/fcn/fcn_r50.py b/configs/fcn/fcn_r50.py new file mode 100644 index 0000000000000000000000000000000000000000..66c5ba80aaf25350b942a8e70fa15fb33a8ad1ce --- /dev/null +++ b/configs/fcn/fcn_r50.py @@ -0,0 +1,13 @@ +_base_ = [ + "../_base_/models/fcn_r50-d8.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=5), + auxiliary_head=dict(num_classes=5) +) diff --git a/configs/fcn/fcn_r50_water.py b/configs/fcn/fcn_r50_water.py new file mode 100644 index 0000000000000000000000000000000000000000..84b43673e05a8e10265868464124ec913b14f59f --- /dev/null +++ b/configs/fcn/fcn_r50_water.py @@ -0,0 +1,13 @@ +_base_ = [ + "../_base_/models/fcn_r50-d8.py", + "../_base_/datasets/water.py", + "../_base_/default_runtime.py", + "../_base_/schedules/water_schedule.py", +] + +data_preprocessor = dict(size=(512, 512)) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=6), + auxiliary_head=dict(num_classes=6) +) diff --git a/configs/ktda/convnextv2_femote_student_adapter_segmormer_head.py b/configs/ktda/convnextv2_femote_student_adapter_segmormer_head.py new file mode 100644 index 0000000000000000000000000000000000000000..a134f7571496523189d735f02590f5037e4299e2 --- /dev/null +++ b/configs/ktda/convnextv2_femote_student_adapter_segmormer_head.py @@ -0,0 +1,13 @@ +_base_ = [ + "../_base_/models/convnextv2_femto_vit_segformer_vegseg.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=5), + auxiliary_head=dict(num_classes=5), +) diff --git a/configs/ktda/dinov2_b_frozen-fam-fmm_focallLoss.py b/configs/ktda/dinov2_b_frozen-fam-fmm_focallLoss.py new file mode 100644 index 0000000000000000000000000000000000000000..a68d5fcd1ccf88296601cab454a3ff17c61330aa --- /dev/null +++ b/configs/ktda/dinov2_b_frozen-fam-fmm_focallLoss.py @@ -0,0 +1,22 @@ +_base_ = [ + "../_base_/models/ktda.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict( + num_classes=5, + loss_decode=[ + dict(type="FocalLoss", use_sigmoid=True, loss_weight=1.0), + dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), + ], + ), + auxiliary_head=dict( + num_classes=5, + ), + fmm=dict(type="FMM", in_channels=[768, 768, 768, 768]), +) diff --git a/configs/ktda/dinov2_b_frozen-fam-fmm_frozen_stu.py b/configs/ktda/dinov2_b_frozen-fam-fmm_frozen_stu.py new file mode 100644 index 0000000000000000000000000000000000000000..d1bcb5fbe69bcbaefe28c13bd26e3b8676c632bd --- /dev/null +++ b/configs/ktda/dinov2_b_frozen-fam-fmm_frozen_stu.py @@ -0,0 +1,22 @@ +_base_ = [ + "../_base_/models/ktda.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + student_training=False, + backbone=dict( + frozen_stages=4, + ), + decode_head=dict( + num_classes=5, + ), + auxiliary_head=dict( + num_classes=5, + ), + fmm=dict(type="FMM", in_channels=[768, 768, 768, 768]), +) diff --git a/configs/ktda/dinov2_b_frozen-fam-fmm_not_distill.py b/configs/ktda/dinov2_b_frozen-fam-fmm_not_distill.py new file mode 100644 index 0000000000000000000000000000000000000000..dcb478a58b303d3e2364698378762f0ad6710031 --- /dev/null +++ b/configs/ktda/dinov2_b_frozen-fam-fmm_not_distill.py @@ -0,0 +1,19 @@ +_base_ = [ + "../_base_/models/ktda.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + student_training=False, + decode_head=dict( + num_classes=5, + ), + auxiliary_head=dict( + num_classes=5, + ), + fmm=dict(type="FMM", in_channels=[768, 768, 768, 768]), +) diff --git a/configs/ktda/experiment_ab.py b/configs/ktda/experiment_ab.py new file mode 100644 index 0000000000000000000000000000000000000000..23eb1079495158bca5199847faa1f286c7286d08 --- /dev/null +++ b/configs/ktda/experiment_ab.py @@ -0,0 +1,46 @@ +_base_ = [ + "../_base_/models/tiny_vit_segformer_vegseg.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + teach_backbone=dict( + type="mmpretrain.VisionTransformer", + arch="large", + frozen_stages=24, + img_size=256, + patch_size=14, + layer_scale_init_value=1e-5, + out_indices=(7, 11, 15, 23), + out_type="featmap", + init_cfg=dict( + type="Pretrained", + checkpoint="checkpoints/dinov2-large.pth", + prefix="backbone", + ), + ), + fam=dict(out_channels=1024), + decode_head=dict(in_channels=[1024, 1024, 1024, 1024], num_classes=5), + data_preprocessor=data_preprocessor, + auxiliary_head=[ + dict( + type="FCNHead", + in_channels=1024, + in_index=i, + channels=256, + num_convs=1, + concat_input=False, + dropout_ratio=0.1, + num_classes=5, + norm_cfg=dict(type="SyncBN", requires_grad=True), + align_corners=False, + loss_decode=dict( + type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4 + ), + ) + for i in range(4) + ], +) diff --git a/configs/ktda/experiment_c.py b/configs/ktda/experiment_c.py new file mode 100644 index 0000000000000000000000000000000000000000..88957be1e43636b1d4adc7ca21f75d55c8f1b741 --- /dev/null +++ b/configs/ktda/experiment_c.py @@ -0,0 +1,14 @@ +_base_ = [ + "../_base_/models/ktda.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=5), + auxiliary_head=dict(num_classes=5), + fmm=dict(type="FMM", in_channels=[768, 768, 768, 768]), +) diff --git a/configs/ktda/experiment_d.py b/configs/ktda/experiment_d.py new file mode 100644 index 0000000000000000000000000000000000000000..8c15e764471e00302e40320cd4f0995919cb50f4 --- /dev/null +++ b/configs/ktda/experiment_d.py @@ -0,0 +1,13 @@ +_base_ = [ + "../_base_/models/ktda.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=5), + auxiliary_head=None +) diff --git a/configs/ktda/experiment_e.py b/configs/ktda/experiment_e.py new file mode 100644 index 0000000000000000000000000000000000000000..f8fd7d9822efdaf321ad88f9942dfe41080a8466 --- /dev/null +++ b/configs/ktda/experiment_e.py @@ -0,0 +1,15 @@ +_base_ = [ + "../_base_/models/ktda.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + alpha=0, + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=5), + auxiliary_head=dict(num_classes=5), + fmm=dict(type="FMM", in_channels=[768, 768, 768, 768]), +) diff --git a/configs/ktda/experiment_f.py b/configs/ktda/experiment_f.py new file mode 100644 index 0000000000000000000000000000000000000000..8f5881b8eb0b5f7a0bd4573e0423b2da3b7cf470 --- /dev/null +++ b/configs/ktda/experiment_f.py @@ -0,0 +1,15 @@ +_base_ = [ + "../_base_/models/ktda.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + alpha=1, + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=5), + auxiliary_head=dict(num_classes=5), + fmm=dict(type="FMM", in_channels=[768, 768, 768, 768]), +) diff --git a/configs/ktda/experiment_g.py b/configs/ktda/experiment_g.py new file mode 100644 index 0000000000000000000000000000000000000000..81fc7a40ff73268cefb2bafecead7c9afc99d04c --- /dev/null +++ b/configs/ktda/experiment_g.py @@ -0,0 +1,18 @@ +_base_ = [ + "../_base_/models/ktda.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + teach_backbone=dict(out_indices=(11)), + backbone=dict(out_indices=(3)), + fam=dict(in_channels=[1024]), + neck=dict(in_channels=[768], scales=[1]), + decode_head=dict(num_classes=5,in_channels=[768],in_index=[0]), + auxiliary_head=dict(num_classes=5,in_index=0), + fmm=dict(type="FMM", in_channels=[768]), +) diff --git a/configs/ktda/experiment_h.py b/configs/ktda/experiment_h.py new file mode 100644 index 0000000000000000000000000000000000000000..07cca1b17560f2b7097fa1b67da5562cf78ca8f3 --- /dev/null +++ b/configs/ktda/experiment_h.py @@ -0,0 +1,16 @@ +_base_ = [ + "../_base_/models/ktda.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + fuse=True, + neck=dict(in_channels=[768], scales=[1]), + decode_head=dict(num_classes=5,in_channels=[768],in_index=[0]), + auxiliary_head=dict(num_classes=5,in_index=0), + fmm=dict(type="FMM", in_channels=[768]), +) diff --git a/configs/ktda/experiment_i.py b/configs/ktda/experiment_i.py new file mode 100644 index 0000000000000000000000000000000000000000..a68c4c7336bda82861ceef316dc9b31320d89c6d --- /dev/null +++ b/configs/ktda/experiment_i.py @@ -0,0 +1,14 @@ +_base_ = [ + "../_base_/models/ktda.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=5), + auxiliary_head=dict(num_classes=5), + fmm=dict(type="FMM", in_channels=[768, 768, 768, 768],mlp_nums=2), +) diff --git a/configs/ktda/experiment_j.py b/configs/ktda/experiment_j.py new file mode 100644 index 0000000000000000000000000000000000000000..e14cf3f8775138e3c6c9399aef5cdc0744037cdb --- /dev/null +++ b/configs/ktda/experiment_j.py @@ -0,0 +1,14 @@ +_base_ = [ + "../_base_/models/ktda.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=5), + auxiliary_head=dict(num_classes=5), + fmm=dict(type="FMM", in_channels=[768, 768, 768, 768],mlp_nums=3), +) diff --git a/configs/ktda/experiment_l.py b/configs/ktda/experiment_l.py new file mode 100644 index 0000000000000000000000000000000000000000..b4465f021fc9a0f0c071424425f3564a5f8f908e --- /dev/null +++ b/configs/ktda/experiment_l.py @@ -0,0 +1,14 @@ +_base_ = [ + "../_base_/models/ktda.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=5), + auxiliary_head=None, + fmm=dict(type="FMM", in_channels=[768, 768, 768, 768]), +) diff --git a/configs/ktda/experiment_m.py b/configs/ktda/experiment_m.py new file mode 100644 index 0000000000000000000000000000000000000000..4c3ab0cc3cdb9559b81b8c6c0e0a4928eb45cd10 --- /dev/null +++ b/configs/ktda/experiment_m.py @@ -0,0 +1,16 @@ +_base_ = [ + "../_base_/models/ktda.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=5), + auxiliary_head=dict(num_classes=5), + fmm=dict( + type="FMM", in_channels=[768, 768, 768, 768], model_type="vitBlock" + ), +) diff --git a/configs/ktda/experiment_n.py b/configs/ktda/experiment_n.py new file mode 100644 index 0000000000000000000000000000000000000000..c70aeafbfd3e3fe460f4b777864a04d0f97e4277 --- /dev/null +++ b/configs/ktda/experiment_n.py @@ -0,0 +1,19 @@ +_base_ = [ + "../_base_/models/ktda.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=5), + auxiliary_head=dict(num_classes=5), + fmm=dict( + type="FMM", + in_channels=[768, 768, 768, 768], + model_type="vitBlock", + mlp_nums=2, + ), +) diff --git a/configs/ktda/experiment_o.py b/configs/ktda/experiment_o.py new file mode 100644 index 0000000000000000000000000000000000000000..800c311af2a9c4693011827624cba224f8b74535 --- /dev/null +++ b/configs/ktda/experiment_o.py @@ -0,0 +1,19 @@ +_base_ = [ + "../_base_/models/ktda.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=5), + auxiliary_head=dict(num_classes=5), + fmm=dict( + type="fmm", + in_channels=[768, 768, 768, 768], + model_type="vitBlock", + mlp_nums=3, + ), +) diff --git a/configs/ktda/experiment_s.py b/configs/ktda/experiment_s.py new file mode 100644 index 0000000000000000000000000000000000000000..808e465539b645db5c9f15870f8164506472e7c0 --- /dev/null +++ b/configs/ktda/experiment_s.py @@ -0,0 +1,14 @@ +_base_ = [ + "../_base_/models/convnextv2_femto_vit_segformer_vegseg.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=5), + auxiliary_head=dict(num_classes=5), + fmm=dict(type="FMM", in_channels=[768, 768, 768, 768]), +) diff --git a/configs/ktda/experiment_t.py b/configs/ktda/experiment_t.py new file mode 100644 index 0000000000000000000000000000000000000000..9db8f100fd546f7247de748293bdfcde5a513466 --- /dev/null +++ b/configs/ktda/experiment_t.py @@ -0,0 +1,14 @@ +_base_ = [ + "../_base_/models/tiny_vit_segformer_vegseg.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=5), + auxiliary_head=dict(num_classes=5), + fmm=dict(type="FMM", in_channels=[768, 768, 768, 768]), +) diff --git a/configs/ktda/experiment_w.py b/configs/ktda/experiment_w.py new file mode 100644 index 0000000000000000000000000000000000000000..8c56dfbf95b51822a0a090f0b79535456fed5fd6 --- /dev/null +++ b/configs/ktda/experiment_w.py @@ -0,0 +1,32 @@ +_base_ = [ + "../_base_/models/ktda.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + backbone=dict( + _delete_=True, + type="mmpretrain.ConvNeXt", + arch="femto", + out_indices=[0, 1, 2, 3], + drop_path_rate=0.1, + layer_scale_init_value=0.0, + gap_before_final_norm=False, + use_grn=True, + init_cfg=dict( + type="Pretrained", + checkpoint="checkpoints/convnextv2_femote.pth", + prefix="backbone", + ), + ), + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=5), + auxiliary_head=dict(num_classes=5), + fmm=dict(type="FMM", in_channels=[768, 768, 768, 768]), + fam=dict( + in_channels=[48, 96, 192, 384], + ), +) diff --git a/configs/ktda/experiment_x.py b/configs/ktda/experiment_x.py new file mode 100644 index 0000000000000000000000000000000000000000..1edcde0bbba1457060397a28a70015e6d3691c85 --- /dev/null +++ b/configs/ktda/experiment_x.py @@ -0,0 +1,32 @@ +_base_ = [ + "../_base_/models/ktda.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + backbone=dict( + _delete_=True, + type="mmpretrain.TinyViT", + arch="5m", + img_size=(256, 256), + window_size=[7, 7, 14, 7], + out_indices=(0, 1, 2, 3), + drop_path_rate=0.0, + gap_before_final_norm=False, + init_cfg=dict( + type="Pretrained", + checkpoint="checkpoints/tiny_vit_5m_imagenet.pth", + prefix="backbone", + ), + ), + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=5), + auxiliary_head=dict(num_classes=5), + fmm=dict(type="FMM", in_channels=[768, 768, 768, 768]), + fam=dict( + in_channels=[128, 160, 320, 320], + ), +) diff --git a/configs/ktda/experiment_y.py b/configs/ktda/experiment_y.py new file mode 100644 index 0000000000000000000000000000000000000000..f02c283b81121d919a8c06cecab2eeb66881b8d8 --- /dev/null +++ b/configs/ktda/experiment_y.py @@ -0,0 +1,29 @@ +_base_ = [ + "../_base_/models/convnextv2_femto_vit_segformer_vegseg.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + teach_backbone=dict( + type="mmpretrain.VisionTransformer", + arch="large", + frozen_stages=24, + img_size=256, + patch_size=14, + layer_scale_init_value=1e-5, + out_indices=(7, 11, 15, 23), + out_type="featmap", + init_cfg=dict( + type="Pretrained", + checkpoint="checkpoints/dinov2-large.pth", + prefix="backbone", + ), + ), + fam=dict(out_channels=1024), + decode_head=dict(in_channels=[1024, 1024, 1024, 1024],num_classes=5), + data_preprocessor=data_preprocessor, + auxiliary_head=dict(num_classes=5,in_channels=1024), +) diff --git a/configs/ktda/experiment_z.py b/configs/ktda/experiment_z.py new file mode 100644 index 0000000000000000000000000000000000000000..da2c6ee889529ecac99e23eb478df796cc22ad5b --- /dev/null +++ b/configs/ktda/experiment_z.py @@ -0,0 +1,29 @@ +_base_ = [ + "../_base_/models/tiny_vit_segformer_vegseg.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + teach_backbone=dict( + type="mmpretrain.VisionTransformer", + arch="large", + frozen_stages=24, + img_size=256, + patch_size=14, + layer_scale_init_value=1e-5, + out_indices=(7, 11, 15, 23), + out_type="featmap", + init_cfg=dict( + type="Pretrained", + checkpoint="checkpoints/dinov2-large.pth", + prefix="backbone", + ), + ), + fam=dict(out_channels=1024), + decode_head=dict(in_channels=[1024, 1024, 1024, 1024],num_classes=5), + data_preprocessor=data_preprocessor, + auxiliary_head=dict(num_classes=5,in_channels=1024), +) diff --git a/configs/ktda/ktda_cloud.py b/configs/ktda/ktda_cloud.py new file mode 100644 index 0000000000000000000000000000000000000000..5913949aa80743bedd5fa33fd8b98532a26b894b --- /dev/null +++ b/configs/ktda/ktda_cloud.py @@ -0,0 +1,23 @@ +_base_ = [ + "../_base_/models/ktda.py", + "../_base_/datasets/l8_biome.py", + "../_base_/default_runtime.py", + "../_base_/schedules/other_dataset_scedule.py", +] + +data_preprocessor = dict(size=(512, 512)) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=4), + auxiliary_head=dict(num_classes=4), + fam=dict(output_size=37), + fmm=dict( + type="FMM", + in_channels=[768, 768, 768, 768], + model_type="vitBlock", + mlp_nums=4, + ), +) +train_dataloader = dict(batch_size=2,num_workers=2) +val_dataloader = dict(batch_size=2,num_workers=2) +test_dataloader = dict(batch_size=2,num_workers=2) diff --git a/configs/ktda/vit_tiny_student_adapter_segformer_head.py b/configs/ktda/vit_tiny_student_adapter_segformer_head.py new file mode 100644 index 0000000000000000000000000000000000000000..7ab914466e18f02959399eea1c3bbfe5cc5c673c --- /dev/null +++ b/configs/ktda/vit_tiny_student_adapter_segformer_head.py @@ -0,0 +1,13 @@ +_base_ = [ + "../_base_/models/tiny_vit_segformer_vegseg.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=5), + auxiliary_head=dict(num_classes=5), +) diff --git a/configs/mask2former/mask2former_swin_b.py b/configs/mask2former/mask2former_swin_b.py new file mode 100644 index 0000000000000000000000000000000000000000..6a0ae9aae3095651d580ba8179665cdffa4f6d11 --- /dev/null +++ b/configs/mask2former/mask2former_swin_b.py @@ -0,0 +1,12 @@ +_base_ = [ + "../_base_/models/mask2former_swin-b.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=5, loss_cls=dict(class_weight=[1.0] * 5 + [0.1])), +) diff --git a/configs/mask2former/mask2former_swin_b_water.py b/configs/mask2former/mask2former_swin_b_water.py new file mode 100644 index 0000000000000000000000000000000000000000..57cfe71741c6c8620277d00ffc7c9776b95d36c8 --- /dev/null +++ b/configs/mask2former/mask2former_swin_b_water.py @@ -0,0 +1,12 @@ +_base_ = [ + "../_base_/models/mask2former_swin-b.py", + "../_base_/datasets/water.py", + "../_base_/default_runtime.py", + "../_base_/schedules/water_schedule.py", +] + +data_preprocessor = dict(size=(512, 512)) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=6, loss_cls=dict(class_weight=[1.0] * 6 + [0.1])), +) diff --git a/configs/mask2former/mask2former_swin_l.py b/configs/mask2former/mask2former_swin_l.py new file mode 100644 index 0000000000000000000000000000000000000000..18ff975978c1bb405d9bc04c4cdccb2c93f0c76d --- /dev/null +++ b/configs/mask2former/mask2former_swin_l.py @@ -0,0 +1,22 @@ +_base_ = [ + "../_base_/models/mask2former_swin-b.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] +pretrained = "https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_large_patch4_window12_384_22k_20220412-6580f57d.pth" # noqa +data_preprocessor = dict(size=(256, 256)) +model = dict( + backbone=dict( + embed_dims=192, + num_heads=[6, 12, 24, 48], + init_cfg=dict(type="Pretrained", checkpoint=pretrained), + ), + data_preprocessor=data_preprocessor, + decode_head=dict( + num_classes=5, + num_queries=100, + in_channels=[192, 384, 768, 1536], + loss_cls=dict(class_weight=[1.0] * 5 + [0.1]), + ), +) diff --git a/configs/pspnet/pspnet_r101.py b/configs/pspnet/pspnet_r101.py new file mode 100644 index 0000000000000000000000000000000000000000..767350b197180136a25a11b69fb64e3f26da5579 --- /dev/null +++ b/configs/pspnet/pspnet_r101.py @@ -0,0 +1,15 @@ +_base_ = [ + "../_base_/models/pspnet_r50-d8.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + pretrained='open-mmlab://resnet101_v1c', + backbone=dict(depth=101), + decode_head=dict(num_classes=5), + auxiliary_head=dict(num_classes=5) +) \ No newline at end of file diff --git a/configs/segformer/segformer_mit-b0.py b/configs/segformer/segformer_mit-b0.py new file mode 100644 index 0000000000000000000000000000000000000000..f54b8be76798a54747bf3c19f099520fe6374575 --- /dev/null +++ b/configs/segformer/segformer_mit-b0.py @@ -0,0 +1,14 @@ +_base_ = [ + "../_base_/models/segformer_mit-b0.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +checkpoint = "https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b0_20220624-7e0fe6dd.pth" # noqa +model = dict( + data_preprocessor=data_preprocessor, + backbone=dict(init_cfg=dict(type="Pretrained", checkpoint=checkpoint)), + decode_head=dict(num_classes=5), +) diff --git a/configs/segformer/segformer_mit-b5.py b/configs/segformer/segformer_mit-b5.py new file mode 100644 index 0000000000000000000000000000000000000000..7192f2e29b1dd71a07ebb006480da26b5d89675e --- /dev/null +++ b/configs/segformer/segformer_mit-b5.py @@ -0,0 +1,18 @@ +_base_ = [ + "../_base_/models/segformer_mit-b0.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +checkpoint = "https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b5_20220624-658746d9.pth" # noqa +model = dict( + data_preprocessor=data_preprocessor, + backbone=dict( + init_cfg=dict(type="Pretrained", checkpoint=checkpoint), + embed_dims=64, + num_layers=[3, 6, 40, 3], + ), + decode_head=dict(num_classes=5, in_channels=[64, 128, 320, 512]), +) diff --git a/configs/unet.py/unet-s5-d16_deeplabv3.py b/configs/unet.py/unet-s5-d16_deeplabv3.py new file mode 100644 index 0000000000000000000000000000000000000000..eca6d17611c6296475d5b72c6986a75d15b9ad72 --- /dev/null +++ b/configs/unet.py/unet-s5-d16_deeplabv3.py @@ -0,0 +1,13 @@ +_base_ = [ + "../_base_/models/deeplabv3_unet_s5-d16.py", + "../_base_/datasets/grass.py", + "../_base_/default_runtime.py", + "../_base_/schedules/grass_schedule.py", +] + +data_preprocessor = dict(size=(256, 256)) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=5), + auxiliary_head=dict(num_classes=5), +) diff --git a/configs/unet.py/unet-s5-d16_deeplabv3_water.py b/configs/unet.py/unet-s5-d16_deeplabv3_water.py new file mode 100644 index 0000000000000000000000000000000000000000..1185631df1471b2b55b7b4fbbede6faa52970816 --- /dev/null +++ b/configs/unet.py/unet-s5-d16_deeplabv3_water.py @@ -0,0 +1,13 @@ +_base_ = [ + "../_base_/models/deeplabv3_unet_s5-d16.py", + "../_base_/datasets/water.py", + "../_base_/default_runtime.py", + "../_base_/schedules/water_schedule.py", +] + +data_preprocessor = dict(size=(512, 512)) +model = dict( + data_preprocessor=data_preprocessor, + decode_head=dict(num_classes=6), + auxiliary_head=dict(num_classes=6), +) diff --git a/requirements/albu.txt b/requirements/albu.txt new file mode 100644 index 0000000000000000000000000000000000000000..f421fbbdc472527e6010cb62a7d0236cf034f24f --- /dev/null +++ b/requirements/albu.txt @@ -0,0 +1 @@ +albumentations>=0.3.2 --no-binary qudida,albumentations diff --git a/requirements/mminstall.txt b/requirements/mminstall.txt new file mode 100644 index 0000000000000000000000000000000000000000..5732d345bb5770bda2889be232b3d61793c0d0b9 --- /dev/null +++ b/requirements/mminstall.txt @@ -0,0 +1,2 @@ +mmcv>=2.0.0rc4,<2.2.0 +mmengine>=0.5.0,<1.0.0 diff --git a/requirements/multimodal.txt b/requirements/multimodal.txt new file mode 100644 index 0000000000000000000000000000000000000000..2195d0d9ef808810f2718f18ba99f80af88dd28d --- /dev/null +++ b/requirements/multimodal.txt @@ -0,0 +1,2 @@ +ftfy +regex diff --git a/requirements/readthedocs.txt b/requirements/readthedocs.txt new file mode 100644 index 0000000000000000000000000000000000000000..962750488496d718154ea00d1c90cc299cf05fb1 --- /dev/null +++ b/requirements/readthedocs.txt @@ -0,0 +1,6 @@ +mmcv>=2.0.0rc1,<2.1.0 +mmengine>=0.4.0,<1.0.0 +prettytable +scipy +torch +torchvision diff --git a/requirements/tests.txt b/requirements/tests.txt new file mode 100644 index 0000000000000000000000000000000000000000..3fff2520d7cd2582e296c6cac1e57730a9fe7eba --- /dev/null +++ b/requirements/tests.txt @@ -0,0 +1,8 @@ +codecov +flake8 +ftfy +interrogate +pytest +regex +xdoctest>=0.10.0 +yapf diff --git a/tools/test.py b/tools/test.py new file mode 100644 index 0000000000000000000000000000000000000000..0d7f39b3a8b5f94d33ce8529755013451184d5ed --- /dev/null +++ b/tools/test.py @@ -0,0 +1,123 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import argparse +import os +import os.path as osp + +from mmengine.config import Config, DictAction +from mmengine.runner import Runner + + +# TODO: support fuse_conv_bn, visualization, and format_only +def parse_args(): + parser = argparse.ArgumentParser( + description='MMSeg test (and eval) a model') + parser.add_argument('config', help='train config file path') + parser.add_argument('checkpoint', help='checkpoint file') + parser.add_argument( + '--work-dir', + help=('if specified, the evaluation metric results will be dumped' + 'into the directory as json')) + parser.add_argument( + '--out', + type=str, + help='The directory to save output prediction for offline evaluation') + parser.add_argument( + '--show', action='store_true', help='show prediction results') + parser.add_argument( + '--show-dir', + help='directory where painted images will be saved. ' + 'If specified, it will be automatically saved ' + 'to the work_dir/timestamp/show_dir') + parser.add_argument( + '--wait-time', type=float, default=2, help='the interval of show (s)') + parser.add_argument( + '--cfg-options', + nargs='+', + action=DictAction, + help='override some settings in the used config, the key-value pair ' + 'in xxx=yyy format will be merged into config file. If the value to ' + 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' + 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' + 'Note that the quotation marks are necessary and that no white space ' + 'is allowed.') + parser.add_argument( + '--launcher', + choices=['none', 'pytorch', 'slurm', 'mpi'], + default='none', + help='job launcher') + parser.add_argument( + '--tta', action='store_true', help='Test time augmentation') + # When using PyTorch version >= 2.0.0, the `torch.distributed.launch` + # will pass the `--local-rank` parameter to `tools/train.py` instead + # of `--local_rank`. + parser.add_argument('--local_rank', '--local-rank', type=int, default=0) + args = parser.parse_args() + if 'LOCAL_RANK' not in os.environ: + os.environ['LOCAL_RANK'] = str(args.local_rank) + + return args + + +def trigger_visualization_hook(cfg, args): + default_hooks = cfg.default_hooks + if 'visualization' in default_hooks: + visualization_hook = default_hooks['visualization'] + # Turn on visualization + visualization_hook['draw'] = True + if args.show: + visualization_hook['show'] = True + visualization_hook['wait_time'] = args.wait_time + if args.show_dir: + visualizer = cfg.visualizer + visualizer['save_dir'] = args.show_dir + else: + raise RuntimeError( + 'VisualizationHook must be included in default_hooks.' + 'refer to usage ' + '"visualization=dict(type=\'VisualizationHook\')"') + + return cfg + + +def main(): + args = parse_args() + + # load config + cfg = Config.fromfile(args.config) + cfg.launcher = args.launcher + if args.cfg_options is not None: + cfg.merge_from_dict(args.cfg_options) + + # work_dir is determined in this priority: CLI > segment in file > filename + if args.work_dir is not None: + # update configs according to CLI args if args.work_dir is not None + cfg.work_dir = args.work_dir + elif cfg.get('work_dir', None) is None: + # use config filename as default work_dir if cfg.work_dir is None + cfg.work_dir = osp.join('./work_dirs', + osp.splitext(osp.basename(args.config))[0]) + + cfg.load_from = args.checkpoint + + if args.show or args.show_dir: + cfg = trigger_visualization_hook(cfg, args) + + if args.tta: + cfg.test_dataloader.dataset.pipeline = cfg.tta_pipeline + cfg.tta_model.module = cfg.model + cfg.model = cfg.tta_model + + # add output_dir in metric + if args.out is not None: + cfg.test_evaluator['output_dir'] = args.out + cfg.test_evaluator['keep_results'] = True + + # build the runner from config + runner = Runner.from_cfg(cfg) + + # start testing + runner.test() + + +if __name__ == '__main__': + main() diff --git a/tools/vis_model.py b/tools/vis_model.py new file mode 100644 index 0000000000000000000000000000000000000000..8aa73385f44ee3c46deaf37ca1660c63706c85c4 --- /dev/null +++ b/tools/vis_model.py @@ -0,0 +1,160 @@ +from glob import glob +import argparse +import os +from typing import Tuple, List +import numpy as np +from mmeval import MeanIoU +from PIL import Image +from matplotlib import pyplot as plt +from mmseg.apis import MMSegInferencer +from vegseg.datasets import GrassDataset + + +def get_iou(pred: np.ndarray, gt: np.ndarray, num_classes=2): + pred = pred[np.newaxis] + gt = gt[np.newaxis] + miou = MeanIoU(num_classes=num_classes) + result = miou(pred, gt) + return result["mIoU"] * 100 + + +def get_args() -> Tuple[str, str, int]: + """ + get args + return: + --models: all_models path. + --device: device to use. + --dataset_path: dataset path. + --output_path: output path for saving. + """ + parser = argparse.ArgumentParser() + parser.add_argument("--models", type=str, default="work_dirs") + parser.add_argument("--device", type=str, default="cuda:0") + parser.add_argument("--dataset_path", type=str, default="data/grass") + args = parser.parse_args() + return args.models, args.device, args.dataset_path + + +def give_color_to_mask( + mask: Image.Image | np.ndarray, palette: List[int] +) -> Image.Image: + """ + Args: + mask: mask to color, numpy array or PIL Image. + palette: palette of dataset. + return: + mask: mask with color. + """ + if isinstance(mask, np.ndarray): + mask = Image.fromarray(mask) + mask = mask.convert("P") + mask.putpalette(palette) + return mask + + +def get_image_and_mask_paths( + dataset_path: str, num: int +) -> Tuple[List[str], List[str]]: + """ + get image and mask paths from dataset path. + return: + image_paths: list of image paths. + mask_paths: list of mask paths. + """ + image_paths = glob(os.path.join(dataset_path, "img_dir", "val", "*.tif")) + if num != -1: + image_paths = image_paths[:num] + mask_paths = [ + filename.replace("tif", "png").replace("img_dir", "ann_dir") + for filename in image_paths + ] + return image_paths, mask_paths + + +def get_palette() -> List[int]: + """ + get palette of dataset. + return: + palette: list of palette. + """ + palette = [] + palette_list = GrassDataset.METAINFO["palette"] + for palette_item in palette_list: + palette.extend(palette_item) + return palette + + +def init_all_models(models_path: str, device: str): + """ + init all models + Args: + models_path (str): path to all models. + device (str): device to use. + Return: + models (dict): dict of models. + """ + models = {} + all_models = os.listdir(models_path) + for model_path in all_models: + model_name = model_path + model_path = os.path.join(models_path, model_path) + config_path = glob(os.path.join(model_path, "*.py"))[0] + weight_path = glob(os.path.join(model_path, "best_mIoU_iter_*.pth"))[0] + inference = MMSegInferencer( + config_path, + weight_path, + device=device, + classes=GrassDataset.METAINFO["classes"], + palette=GrassDataset.METAINFO["palette"], + ) + models[model_name] = inference + return models + + +def main(): + models_path, device, dataset_path = get_args() + image_paths, mask_paths = get_image_and_mask_paths(dataset_path, -1) + palette = get_palette() + models = init_all_models(models_path, device) + os.makedirs("vis_results", exist_ok=True) + for image_path, mask_path in zip(image_paths, mask_paths): + result_eval = {} + result_iou = {} + mask = Image.open(mask_path) + for model_name, inference in models.items(): + predictions: np.ndarray = inference(image_path)["predictions"] + predictions = predictions.astype(np.uint8) + result_eval[model_name] = predictions + result_iou[model_name] = get_iou(predictions, np.array(mask), num_classes=5) + + # 根据iou 进行排序 + result_iou_sorted = sorted(result_iou.items(), key=lambda x: x[1], reverse=True) + plt.figure(figsize=(36, 3)) + plt.subplot(1, len(models) + 2, 1) + plt.imshow(Image.open(image_path)) + plt.axis("off") + plt.title("Input") + + plt.subplot(1, len(models) + 2, 2) + plt.imshow(give_color_to_mask(mask, palette=palette)) + plt.axis("off") + plt.title("Label") + + for i, (model_name, _) in enumerate(result_iou_sorted): + plt.subplot(1, len(models) + 2, i + 3) + plt.imshow(give_color_to_mask(result_eval[model_name], palette)) + plt.axis("off") + plt.title(f"{model_name}: {result_iou[model_name]:.2f}") + + base_name = os.path.basename(image_path).split(".")[0] + plt.savefig( + f"vis_results/{base_name}.png", + dpi=300, + bbox_inches="tight", + pad_inches=0, + ) + + +if __name__ == "__main__": + # example usage: python tools/vis_model.py --models work_dirs --device cuda:0 --dataset_path data/grass + main()