Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- configs/_base_/datasets/grass.py +50 -0
- configs/_base_/datasets/l8_biome.py +62 -0
- configs/_base_/default_runtime.py +15 -0
- configs/_base_/models/convnext_upernet.py +52 -0
- configs/_base_/models/convnextv2_femto_vit_segformer_vegseg.py +78 -0
- configs/_base_/models/deeplabv3_unet_s5-d16.py +58 -0
- configs/_base_/models/deeplabv3plus_r50-d8.py +54 -0
- configs/_base_/models/dinov2_upernet.py +64 -0
- configs/_base_/models/fcn_r50-d8.py +53 -0
- configs/_base_/models/ktda.py +84 -0
- configs/_base_/models/mask2former_r50_8xb2.py +131 -0
- configs/_base_/models/mask2former_swin-b.py +158 -0
- configs/_base_/models/pspnet_r50-d8.py +52 -0
- configs/_base_/models/segformer_mit-b0.py +42 -0
- configs/_base_/models/tiny_vit_segformer_vegseg.py +78 -0
- configs/_base_/schedules/grass_schedule.py +43 -0
- configs/_base_/schedules/other_dataset_scedule.py +43 -0
- configs/_base_/schedules/schedule_160k.py +25 -0
- configs/_base_/schedules/schedule_20k.py +24 -0
- configs/_base_/schedules/schedule_240k.py +25 -0
- configs/_base_/schedules/schedule_25k.py +28 -0
- configs/_base_/schedules/schedule_320k.py +25 -0
- configs/_base_/schedules/schedule_40k.py +24 -0
- configs/_base_/schedules/schedule_80k.py +24 -0
- configs/_base_/schedules/water_schedule.py +43 -0
- configs/convnext/convnext-v2-femto_upernet.py +13 -0
- configs/convnext/convnext_b_upernet.py +13 -0
- configs/deeplabv3plus/deeplabv3plus_r101.py +15 -0
- configs/deeplabv3plus/deeplabv3plus_r101_water.py +15 -0
- configs/dinov2/dinov2_upernet.py +13 -0
- configs/dinov2/dinov2_upernet_focal_loss.py +31 -0
- configs/dinov2/full_dinov2_upernet.py +16 -0
- configs/fcn/fcn_r101.py +15 -0
- configs/fcn/fcn_r50.py +13 -0
- configs/fcn/fcn_r50_water.py +13 -0
- configs/ktda/convnextv2_femote_student_adapter_segmormer_head.py +13 -0
- configs/ktda/dinov2_b_frozen-fam-fmm_focallLoss.py +22 -0
- configs/ktda/dinov2_b_frozen-fam-fmm_frozen_stu.py +22 -0
- configs/ktda/dinov2_b_frozen-fam-fmm_not_distill.py +19 -0
- configs/ktda/experiment_ab.py +46 -0
- configs/ktda/experiment_c.py +14 -0
- configs/ktda/experiment_d.py +13 -0
- configs/ktda/experiment_e.py +15 -0
- configs/ktda/experiment_f.py +15 -0
- configs/ktda/experiment_g.py +18 -0
- configs/ktda/experiment_h.py +16 -0
- configs/ktda/experiment_i.py +14 -0
- configs/ktda/experiment_j.py +14 -0
- configs/ktda/experiment_l.py +14 -0
- configs/ktda/experiment_m.py +16 -0
configs/_base_/datasets/grass.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset_type = 'GrassDataset'
|
2 |
+
data_root = 'data/grass'
|
3 |
+
|
4 |
+
crop_size = (256, 256)
|
5 |
+
train_pipeline = [
|
6 |
+
dict(type='LoadImageFromFile'),
|
7 |
+
dict(type='LoadAnnotations'),
|
8 |
+
dict(type='RandomCrop', crop_size=crop_size),
|
9 |
+
dict(type='RandomFlip', prob=0.5),
|
10 |
+
dict(type='PhotoMetricDistortion'),
|
11 |
+
dict(type='PackSegInputs')
|
12 |
+
]
|
13 |
+
test_pipeline = [
|
14 |
+
dict(type='LoadImageFromFile'),
|
15 |
+
dict(type='Resize', scale=crop_size),
|
16 |
+
# add loading annotation after ``Resize`` because ground truth
|
17 |
+
# does not need to do resize data transform
|
18 |
+
dict(type='LoadAnnotations'),
|
19 |
+
dict(type='PackSegInputs')
|
20 |
+
]
|
21 |
+
|
22 |
+
train_dataloader = dict(
|
23 |
+
batch_size=4,
|
24 |
+
num_workers=4,
|
25 |
+
persistent_workers=True,
|
26 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
27 |
+
dataset=dict(
|
28 |
+
type=dataset_type,
|
29 |
+
data_root=data_root,
|
30 |
+
data_prefix=dict(
|
31 |
+
img_path='img_dir/train',
|
32 |
+
seg_map_path='ann_dir/train'),
|
33 |
+
pipeline=train_pipeline))
|
34 |
+
val_dataloader = dict(
|
35 |
+
batch_size=4,
|
36 |
+
num_workers=4,
|
37 |
+
persistent_workers=True,
|
38 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
39 |
+
dataset=dict(
|
40 |
+
type=dataset_type,
|
41 |
+
data_root=data_root,
|
42 |
+
data_prefix=dict(
|
43 |
+
img_path='img_dir/val',
|
44 |
+
seg_map_path='ann_dir/val'),
|
45 |
+
pipeline=test_pipeline))
|
46 |
+
|
47 |
+
test_dataloader = val_dataloader
|
48 |
+
|
49 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=["mIoU", "mDice", "mFscore"],)
|
50 |
+
test_evaluator = val_evaluator
|
configs/_base_/datasets/l8_biome.py
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset_type = 'L8BIOMEDataset'
|
2 |
+
data_root = 'data/l8_biome'
|
3 |
+
|
4 |
+
crop_size = (512, 512)
|
5 |
+
train_pipeline = [
|
6 |
+
dict(type='LoadImageFromFile'),
|
7 |
+
dict(type='LoadAnnotations'),
|
8 |
+
dict(type='RandomCrop', crop_size=crop_size),
|
9 |
+
dict(type='RandomFlip', prob=0.5),
|
10 |
+
dict(type='PhotoMetricDistortion'),
|
11 |
+
dict(type='PackSegInputs')
|
12 |
+
]
|
13 |
+
test_pipeline = [
|
14 |
+
dict(type='LoadImageFromFile'),
|
15 |
+
dict(type='Resize', scale=crop_size),
|
16 |
+
# add loading annotation after ``Resize`` because ground truth
|
17 |
+
# does not need to do resize data transform
|
18 |
+
dict(type='LoadAnnotations'),
|
19 |
+
dict(type='PackSegInputs')
|
20 |
+
]
|
21 |
+
|
22 |
+
train_dataloader = dict(
|
23 |
+
batch_size=4,
|
24 |
+
num_workers=4,
|
25 |
+
persistent_workers=True,
|
26 |
+
sampler=dict(type='InfiniteSampler', shuffle=True),
|
27 |
+
dataset=dict(
|
28 |
+
type=dataset_type,
|
29 |
+
data_root=data_root,
|
30 |
+
data_prefix=dict(
|
31 |
+
img_path='img_dir/train',
|
32 |
+
seg_map_path='ann_dir/train'),
|
33 |
+
pipeline=train_pipeline))
|
34 |
+
val_dataloader = dict(
|
35 |
+
batch_size=4,
|
36 |
+
num_workers=4,
|
37 |
+
persistent_workers=True,
|
38 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
39 |
+
dataset=dict(
|
40 |
+
type=dataset_type,
|
41 |
+
data_root=data_root,
|
42 |
+
data_prefix=dict(
|
43 |
+
img_path='img_dir/test',
|
44 |
+
seg_map_path='ann_dir/test'),
|
45 |
+
pipeline=test_pipeline))
|
46 |
+
|
47 |
+
test_dataloader = dict(
|
48 |
+
batch_size=4,
|
49 |
+
num_workers=4,
|
50 |
+
persistent_workers=True,
|
51 |
+
sampler=dict(type='DefaultSampler', shuffle=False),
|
52 |
+
dataset=dict(
|
53 |
+
type=dataset_type,
|
54 |
+
data_root=data_root,
|
55 |
+
data_prefix=dict(
|
56 |
+
img_path='img_dir/test',
|
57 |
+
seg_map_path='ann_dir/test'),
|
58 |
+
pipeline=test_pipeline))
|
59 |
+
# test_dataloader = val_dataloader
|
60 |
+
|
61 |
+
val_evaluator = dict(type='IoUMetric', iou_metrics=["mIoU", "mDice", "mFscore"],)
|
62 |
+
test_evaluator = val_evaluator
|
configs/_base_/default_runtime.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
default_scope = 'mmseg'
|
2 |
+
env_cfg = dict(
|
3 |
+
cudnn_benchmark=True,
|
4 |
+
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
5 |
+
dist_cfg=dict(backend='nccl'),
|
6 |
+
)
|
7 |
+
vis_backends = [dict(type='LocalVisBackend')]
|
8 |
+
visualizer = dict(
|
9 |
+
type='SegLocalVisualizer', vis_backends=vis_backends, name='visualizer')
|
10 |
+
log_processor = dict(by_epoch=False)
|
11 |
+
log_level = 'INFO'
|
12 |
+
load_from = None
|
13 |
+
resume = False
|
14 |
+
|
15 |
+
tta_model = dict(type='SegTTAModel')
|
configs/_base_/models/convnext_upernet.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
2 |
+
custom_imports = dict(imports='mmpretrain.models', allow_failed_imports=False)
|
3 |
+
checkpoint_file = 'checkpoints/convnext-base.pth' # noqa
|
4 |
+
data_preprocessor = dict(
|
5 |
+
type='SegDataPreProcessor',
|
6 |
+
mean=[123.675, 116.28, 103.53],
|
7 |
+
std=[58.395, 57.12, 57.375],
|
8 |
+
bgr_to_rgb=True,
|
9 |
+
pad_val=0,
|
10 |
+
seg_pad_val=255)
|
11 |
+
model = dict(
|
12 |
+
type='EncoderDecoder',
|
13 |
+
data_preprocessor=data_preprocessor,
|
14 |
+
pretrained=None,
|
15 |
+
backbone=dict(
|
16 |
+
type='mmpretrain.ConvNeXt',
|
17 |
+
arch='base',
|
18 |
+
out_indices=[0, 1, 2, 3],
|
19 |
+
drop_path_rate=0.4,
|
20 |
+
layer_scale_init_value=1.0,
|
21 |
+
gap_before_final_norm=False,
|
22 |
+
init_cfg=dict(
|
23 |
+
type='Pretrained', checkpoint=checkpoint_file,
|
24 |
+
prefix='backbone.')),
|
25 |
+
decode_head=dict(
|
26 |
+
type='UPerHead',
|
27 |
+
in_channels=[128, 256, 512, 1024],
|
28 |
+
in_index=[0, 1, 2, 3],
|
29 |
+
pool_scales=(1, 2, 3, 6),
|
30 |
+
channels=512,
|
31 |
+
dropout_ratio=0.1,
|
32 |
+
num_classes=19,
|
33 |
+
norm_cfg=norm_cfg,
|
34 |
+
align_corners=False,
|
35 |
+
loss_decode=dict(
|
36 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
37 |
+
auxiliary_head=dict(
|
38 |
+
type='FCNHead',
|
39 |
+
in_channels=512,
|
40 |
+
in_index=2,
|
41 |
+
channels=256,
|
42 |
+
num_convs=1,
|
43 |
+
concat_input=False,
|
44 |
+
dropout_ratio=0.1,
|
45 |
+
num_classes=19,
|
46 |
+
norm_cfg=norm_cfg,
|
47 |
+
align_corners=False,
|
48 |
+
loss_decode=dict(
|
49 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
50 |
+
# model training and testing settings
|
51 |
+
train_cfg=dict(),
|
52 |
+
test_cfg=dict(mode='whole'))
|
configs/_base_/models/convnextv2_femto_vit_segformer_vegseg.py
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model settings
|
2 |
+
norm_cfg = dict(type="SyncBN", requires_grad=True)
|
3 |
+
data_preprocessor = dict(
|
4 |
+
type="SegDataPreProcessor",
|
5 |
+
mean=[123.675, 116.28, 103.53],
|
6 |
+
std=[58.395, 57.12, 57.375],
|
7 |
+
bgr_to_rgb=True,
|
8 |
+
pad_val=0,
|
9 |
+
seg_pad_val=255,
|
10 |
+
)
|
11 |
+
|
12 |
+
model = dict(
|
13 |
+
type="DistillEncoderDecoder",
|
14 |
+
data_preprocessor=data_preprocessor,
|
15 |
+
pretrained=None,
|
16 |
+
teach_backbone=dict(
|
17 |
+
type="mmpretrain.VisionTransformer",
|
18 |
+
arch="base",
|
19 |
+
frozen_stages=12,
|
20 |
+
img_size=256,
|
21 |
+
patch_size=14,
|
22 |
+
layer_scale_init_value=1e-5,
|
23 |
+
out_indices=(2, 5, 8, 11),
|
24 |
+
out_type="featmap",
|
25 |
+
init_cfg=dict(
|
26 |
+
type="Pretrained",
|
27 |
+
checkpoint="checkpoints/dinov2-base.pth",
|
28 |
+
prefix="backbone",
|
29 |
+
),
|
30 |
+
),
|
31 |
+
backbone=dict(
|
32 |
+
type="mmpretrain.ConvNeXt",
|
33 |
+
arch='femto',
|
34 |
+
out_indices=[0, 1, 2, 3],
|
35 |
+
drop_path_rate=0.1,
|
36 |
+
layer_scale_init_value=0.,
|
37 |
+
gap_before_final_norm=False,
|
38 |
+
use_grn=True,
|
39 |
+
init_cfg=dict(
|
40 |
+
type="Pretrained",
|
41 |
+
checkpoint="checkpoints/convnextv2_femote.pth",
|
42 |
+
prefix="backbone",
|
43 |
+
),
|
44 |
+
),
|
45 |
+
fam=dict(
|
46 |
+
type="FAM",
|
47 |
+
in_channels=[48, 96, 192, 384],
|
48 |
+
out_channels=768,
|
49 |
+
output_size=19,
|
50 |
+
),
|
51 |
+
decode_head=dict(
|
52 |
+
type="SegformerHead",
|
53 |
+
in_channels=[768, 768, 768, 768],
|
54 |
+
in_index=[0, 1, 2, 3],
|
55 |
+
channels=256,
|
56 |
+
dropout_ratio=0.1,
|
57 |
+
num_classes=5,
|
58 |
+
norm_cfg=norm_cfg,
|
59 |
+
align_corners=False,
|
60 |
+
loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0),
|
61 |
+
),
|
62 |
+
auxiliary_head=dict(
|
63 |
+
type="FCNHead",
|
64 |
+
in_channels=768,
|
65 |
+
in_index=2,
|
66 |
+
channels=256,
|
67 |
+
num_convs=1,
|
68 |
+
concat_input=False,
|
69 |
+
dropout_ratio=0.1,
|
70 |
+
num_classes=5,
|
71 |
+
norm_cfg=norm_cfg,
|
72 |
+
align_corners=False,
|
73 |
+
loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4),
|
74 |
+
),
|
75 |
+
# model training and testing settings
|
76 |
+
train_cfg=dict(),
|
77 |
+
test_cfg=dict(mode="whole"),
|
78 |
+
)
|
configs/_base_/models/deeplabv3_unet_s5-d16.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model settings
|
2 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
3 |
+
data_preprocessor = dict(
|
4 |
+
type='SegDataPreProcessor',
|
5 |
+
mean=[123.675, 116.28, 103.53],
|
6 |
+
std=[58.395, 57.12, 57.375],
|
7 |
+
bgr_to_rgb=True,
|
8 |
+
pad_val=0,
|
9 |
+
seg_pad_val=255)
|
10 |
+
model = dict(
|
11 |
+
type='EncoderDecoder',
|
12 |
+
data_preprocessor=data_preprocessor,
|
13 |
+
pretrained=None,
|
14 |
+
backbone=dict(
|
15 |
+
type='UNet',
|
16 |
+
in_channels=3,
|
17 |
+
base_channels=64,
|
18 |
+
num_stages=5,
|
19 |
+
strides=(1, 1, 1, 1, 1),
|
20 |
+
enc_num_convs=(2, 2, 2, 2, 2),
|
21 |
+
dec_num_convs=(2, 2, 2, 2),
|
22 |
+
downsamples=(True, True, True, True),
|
23 |
+
enc_dilations=(1, 1, 1, 1, 1),
|
24 |
+
dec_dilations=(1, 1, 1, 1),
|
25 |
+
with_cp=False,
|
26 |
+
conv_cfg=None,
|
27 |
+
norm_cfg=norm_cfg,
|
28 |
+
act_cfg=dict(type='ReLU'),
|
29 |
+
upsample_cfg=dict(type='InterpConv'),
|
30 |
+
norm_eval=False),
|
31 |
+
decode_head=dict(
|
32 |
+
type='ASPPHead',
|
33 |
+
in_channels=64,
|
34 |
+
in_index=4,
|
35 |
+
channels=16,
|
36 |
+
dilations=(1, 12, 24, 36),
|
37 |
+
dropout_ratio=0.1,
|
38 |
+
num_classes=2,
|
39 |
+
norm_cfg=norm_cfg,
|
40 |
+
align_corners=False,
|
41 |
+
loss_decode=dict(
|
42 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
43 |
+
auxiliary_head=dict(
|
44 |
+
type='FCNHead',
|
45 |
+
in_channels=128,
|
46 |
+
in_index=3,
|
47 |
+
channels=64,
|
48 |
+
num_convs=1,
|
49 |
+
concat_input=False,
|
50 |
+
dropout_ratio=0.1,
|
51 |
+
num_classes=2,
|
52 |
+
norm_cfg=norm_cfg,
|
53 |
+
align_corners=False,
|
54 |
+
loss_decode=dict(
|
55 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
56 |
+
# model training and testing settings
|
57 |
+
train_cfg=dict(),
|
58 |
+
test_cfg=dict(mode='whole'))
|
configs/_base_/models/deeplabv3plus_r50-d8.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model settings
|
2 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
3 |
+
data_preprocessor = dict(
|
4 |
+
type='SegDataPreProcessor',
|
5 |
+
mean=[123.675, 116.28, 103.53],
|
6 |
+
std=[58.395, 57.12, 57.375],
|
7 |
+
bgr_to_rgb=True,
|
8 |
+
pad_val=0,
|
9 |
+
seg_pad_val=255)
|
10 |
+
model = dict(
|
11 |
+
type='EncoderDecoder',
|
12 |
+
data_preprocessor=data_preprocessor,
|
13 |
+
pretrained='open-mmlab://resnet50_v1c',
|
14 |
+
backbone=dict(
|
15 |
+
type='ResNetV1c',
|
16 |
+
depth=50,
|
17 |
+
num_stages=4,
|
18 |
+
out_indices=(0, 1, 2, 3),
|
19 |
+
dilations=(1, 1, 2, 4),
|
20 |
+
strides=(1, 2, 1, 1),
|
21 |
+
norm_cfg=norm_cfg,
|
22 |
+
norm_eval=False,
|
23 |
+
style='pytorch',
|
24 |
+
contract_dilation=True),
|
25 |
+
decode_head=dict(
|
26 |
+
type='DepthwiseSeparableASPPHead',
|
27 |
+
in_channels=2048,
|
28 |
+
in_index=3,
|
29 |
+
channels=512,
|
30 |
+
dilations=(1, 12, 24, 36),
|
31 |
+
c1_in_channels=256,
|
32 |
+
c1_channels=48,
|
33 |
+
dropout_ratio=0.1,
|
34 |
+
num_classes=19,
|
35 |
+
norm_cfg=norm_cfg,
|
36 |
+
align_corners=False,
|
37 |
+
loss_decode=dict(
|
38 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
39 |
+
auxiliary_head=dict(
|
40 |
+
type='FCNHead',
|
41 |
+
in_channels=1024,
|
42 |
+
in_index=2,
|
43 |
+
channels=256,
|
44 |
+
num_convs=1,
|
45 |
+
concat_input=False,
|
46 |
+
dropout_ratio=0.1,
|
47 |
+
num_classes=19,
|
48 |
+
norm_cfg=norm_cfg,
|
49 |
+
align_corners=False,
|
50 |
+
loss_decode=dict(
|
51 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
52 |
+
# model training and testing settings
|
53 |
+
train_cfg=dict(),
|
54 |
+
test_cfg=dict(mode='whole'))
|
configs/_base_/models/dinov2_upernet.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model settings
|
2 |
+
norm_cfg = dict(type="SyncBN", requires_grad=True)
|
3 |
+
data_preprocessor = dict(
|
4 |
+
type="SegDataPreProcessor",
|
5 |
+
mean=[123.675, 116.28, 103.53],
|
6 |
+
std=[58.395, 57.12, 57.375],
|
7 |
+
bgr_to_rgb=True,
|
8 |
+
pad_val=0,
|
9 |
+
seg_pad_val=255,
|
10 |
+
)
|
11 |
+
model = dict(
|
12 |
+
type="EncoderDecoder",
|
13 |
+
data_preprocessor=data_preprocessor,
|
14 |
+
pretrained=None,
|
15 |
+
backbone=dict(
|
16 |
+
type="mmpretrain.VisionTransformer",
|
17 |
+
arch="base",
|
18 |
+
frozen_stages=12,
|
19 |
+
img_size=256,
|
20 |
+
patch_size=14,
|
21 |
+
layer_scale_init_value=1e-5,
|
22 |
+
out_indices=(2, 5, 8, 11),
|
23 |
+
out_type = 'featmap',
|
24 |
+
init_cfg=dict(
|
25 |
+
type="Pretrained",
|
26 |
+
checkpoint="checkpoints/dinov2-base.pth",
|
27 |
+
prefix="backbone",
|
28 |
+
),
|
29 |
+
),
|
30 |
+
neck=dict(
|
31 |
+
type="MultiLevelNeck",
|
32 |
+
in_channels=[768, 768, 768, 768],
|
33 |
+
out_channels=768,
|
34 |
+
scales=[4, 2, 1, 0.5],
|
35 |
+
),
|
36 |
+
decode_head=dict(
|
37 |
+
type="UPerHead",
|
38 |
+
in_channels=[768, 768, 768, 768],
|
39 |
+
in_index=[0, 1, 2, 3],
|
40 |
+
pool_scales=(1, 2, 3, 6),
|
41 |
+
channels=512,
|
42 |
+
dropout_ratio=0.1,
|
43 |
+
num_classes=19,
|
44 |
+
norm_cfg=norm_cfg,
|
45 |
+
align_corners=False,
|
46 |
+
loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0),
|
47 |
+
),
|
48 |
+
auxiliary_head=dict(
|
49 |
+
type="FCNHead",
|
50 |
+
in_channels=768,
|
51 |
+
in_index=2,
|
52 |
+
channels=256,
|
53 |
+
num_convs=1,
|
54 |
+
concat_input=False,
|
55 |
+
dropout_ratio=0.1,
|
56 |
+
num_classes=19,
|
57 |
+
norm_cfg=norm_cfg,
|
58 |
+
align_corners=False,
|
59 |
+
loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4),
|
60 |
+
),
|
61 |
+
# model training and testing settings
|
62 |
+
train_cfg=dict(),
|
63 |
+
test_cfg=dict(mode="whole"),
|
64 |
+
)
|
configs/_base_/models/fcn_r50-d8.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model settings
|
2 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
3 |
+
data_preprocessor = dict(
|
4 |
+
type='SegDataPreProcessor',
|
5 |
+
mean=[123.675, 116.28, 103.53],
|
6 |
+
std=[58.395, 57.12, 57.375],
|
7 |
+
bgr_to_rgb=True,
|
8 |
+
pad_val=0,
|
9 |
+
seg_pad_val=255)
|
10 |
+
model = dict(
|
11 |
+
type='EncoderDecoder',
|
12 |
+
data_preprocessor=data_preprocessor,
|
13 |
+
pretrained='open-mmlab://resnet50_v1c',
|
14 |
+
backbone=dict(
|
15 |
+
type='ResNetV1c',
|
16 |
+
depth=50,
|
17 |
+
num_stages=4,
|
18 |
+
out_indices=(0, 1, 2, 3),
|
19 |
+
dilations=(1, 1, 2, 4),
|
20 |
+
strides=(1, 2, 1, 1),
|
21 |
+
norm_cfg=norm_cfg,
|
22 |
+
norm_eval=False,
|
23 |
+
style='pytorch',
|
24 |
+
contract_dilation=True),
|
25 |
+
decode_head=dict(
|
26 |
+
type='FCNHead',
|
27 |
+
in_channels=2048,
|
28 |
+
in_index=3,
|
29 |
+
channels=512,
|
30 |
+
num_convs=2,
|
31 |
+
concat_input=True,
|
32 |
+
dropout_ratio=0.1,
|
33 |
+
num_classes=5,
|
34 |
+
norm_cfg=norm_cfg,
|
35 |
+
align_corners=False,
|
36 |
+
loss_decode=dict(
|
37 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
38 |
+
auxiliary_head=dict(
|
39 |
+
type='FCNHead',
|
40 |
+
in_channels=1024,
|
41 |
+
in_index=2,
|
42 |
+
channels=256,
|
43 |
+
num_convs=1,
|
44 |
+
concat_input=False,
|
45 |
+
dropout_ratio=0.1,
|
46 |
+
num_classes=19,
|
47 |
+
norm_cfg=norm_cfg,
|
48 |
+
align_corners=False,
|
49 |
+
loss_decode=dict(
|
50 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
51 |
+
# model training and testing settings
|
52 |
+
train_cfg=dict(),
|
53 |
+
test_cfg=dict(mode='whole'))
|
configs/_base_/models/ktda.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model settings
|
2 |
+
norm_cfg = dict(type="SyncBN", requires_grad=True)
|
3 |
+
data_preprocessor = dict(
|
4 |
+
type="SegDataPreProcessor",
|
5 |
+
mean=[123.675, 116.28, 103.53],
|
6 |
+
std=[58.395, 57.12, 57.375],
|
7 |
+
bgr_to_rgb=True,
|
8 |
+
pad_val=0,
|
9 |
+
seg_pad_val=255,
|
10 |
+
)
|
11 |
+
|
12 |
+
model = dict(
|
13 |
+
type="DistillEncoderDecoder",
|
14 |
+
data_preprocessor=data_preprocessor,
|
15 |
+
pretrained=None,
|
16 |
+
teach_backbone=dict(
|
17 |
+
type="mmpretrain.VisionTransformer",
|
18 |
+
arch="base",
|
19 |
+
frozen_stages=12,
|
20 |
+
img_size=256,
|
21 |
+
patch_size=14,
|
22 |
+
layer_scale_init_value=1e-5,
|
23 |
+
out_indices=(2, 5, 8, 11),
|
24 |
+
out_type="featmap",
|
25 |
+
init_cfg=dict(
|
26 |
+
type="Pretrained",
|
27 |
+
checkpoint="checkpoints/dinov2-base.pth",
|
28 |
+
prefix="backbone",
|
29 |
+
),
|
30 |
+
),
|
31 |
+
backbone=dict(
|
32 |
+
type="mmpretrain.ConvNeXt",
|
33 |
+
arch="base",
|
34 |
+
out_indices=[0, 1, 2, 3],
|
35 |
+
drop_path_rate=0.4,
|
36 |
+
layer_scale_init_value=1.0,
|
37 |
+
gap_before_final_norm=False,
|
38 |
+
init_cfg=dict(
|
39 |
+
type="Pretrained",
|
40 |
+
checkpoint="checkpoints/convnext-base.pth",
|
41 |
+
prefix="backbone.",
|
42 |
+
),
|
43 |
+
),
|
44 |
+
fam=dict(
|
45 |
+
type="FAM",
|
46 |
+
in_channels=[128, 256, 512, 1024],
|
47 |
+
out_channels=768,
|
48 |
+
output_size=19,
|
49 |
+
),
|
50 |
+
neck=dict(
|
51 |
+
type="MultiLevelNeck",
|
52 |
+
in_channels=[768, 768, 768, 768],
|
53 |
+
out_channels=768,
|
54 |
+
scales=[4, 2, 1, 0.5],
|
55 |
+
),
|
56 |
+
decode_head=dict(
|
57 |
+
type="UPerHead",
|
58 |
+
in_channels=[768, 768, 768, 768],
|
59 |
+
in_index=[0, 1, 2, 3],
|
60 |
+
pool_scales=(1, 2, 3, 6),
|
61 |
+
channels=512,
|
62 |
+
dropout_ratio=0.1,
|
63 |
+
num_classes=19,
|
64 |
+
norm_cfg=norm_cfg,
|
65 |
+
align_corners=False,
|
66 |
+
loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0),
|
67 |
+
),
|
68 |
+
auxiliary_head=dict(
|
69 |
+
type="FCNHead",
|
70 |
+
in_channels=768,
|
71 |
+
in_index=2,
|
72 |
+
channels=256,
|
73 |
+
num_convs=1,
|
74 |
+
concat_input=False,
|
75 |
+
dropout_ratio=0.1,
|
76 |
+
num_classes=19,
|
77 |
+
norm_cfg=norm_cfg,
|
78 |
+
align_corners=False,
|
79 |
+
loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4),
|
80 |
+
),
|
81 |
+
# model training and testing settings
|
82 |
+
train_cfg=dict(),
|
83 |
+
test_cfg=dict(mode="whole"),
|
84 |
+
)
|
configs/_base_/models/mask2former_r50_8xb2.py
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
data_preprocessor = dict(
|
2 |
+
type='SegDataPreProcessor',
|
3 |
+
mean=[123.675, 116.28, 103.53],
|
4 |
+
std=[58.395, 57.12, 57.375],
|
5 |
+
bgr_to_rgb=True,
|
6 |
+
pad_val=0,
|
7 |
+
seg_pad_val=255,
|
8 |
+
size=(256,256),
|
9 |
+
test_cfg=dict(size_divisor=32))
|
10 |
+
num_classes = 5
|
11 |
+
model = dict(
|
12 |
+
type='EncoderDecoder',
|
13 |
+
data_preprocessor=data_preprocessor,
|
14 |
+
backbone=dict(
|
15 |
+
type='ResNet',
|
16 |
+
depth=50,
|
17 |
+
deep_stem=False,
|
18 |
+
num_stages=4,
|
19 |
+
out_indices=(0, 1, 2, 3),
|
20 |
+
frozen_stages=-1,
|
21 |
+
norm_cfg=dict(type='SyncBN', requires_grad=False),
|
22 |
+
style='pytorch',
|
23 |
+
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
|
24 |
+
decode_head=dict(
|
25 |
+
type='Mask2FormerHead',
|
26 |
+
in_channels=[256, 512, 1024, 2048],
|
27 |
+
strides=[4, 8, 16, 32],
|
28 |
+
feat_channels=256,
|
29 |
+
out_channels=256,
|
30 |
+
num_classes=num_classes,
|
31 |
+
num_queries=100,
|
32 |
+
num_transformer_feat_level=3,
|
33 |
+
align_corners=False,
|
34 |
+
pixel_decoder=dict(
|
35 |
+
type='mmdet.MSDeformAttnPixelDecoder',
|
36 |
+
num_outs=3,
|
37 |
+
norm_cfg=dict(type='GN', num_groups=32),
|
38 |
+
act_cfg=dict(type='ReLU'),
|
39 |
+
encoder=dict( # DeformableDetrTransformerEncoder
|
40 |
+
num_layers=6,
|
41 |
+
layer_cfg=dict( # DeformableDetrTransformerEncoderLayer
|
42 |
+
self_attn_cfg=dict( # MultiScaleDeformableAttention
|
43 |
+
embed_dims=256,
|
44 |
+
num_heads=8,
|
45 |
+
num_levels=3,
|
46 |
+
num_points=4,
|
47 |
+
im2col_step=64,
|
48 |
+
dropout=0.0,
|
49 |
+
batch_first=True,
|
50 |
+
norm_cfg=None,
|
51 |
+
init_cfg=None),
|
52 |
+
ffn_cfg=dict(
|
53 |
+
embed_dims=256,
|
54 |
+
feedforward_channels=1024,
|
55 |
+
num_fcs=2,
|
56 |
+
ffn_drop=0.0,
|
57 |
+
act_cfg=dict(type='ReLU', inplace=True))),
|
58 |
+
init_cfg=None),
|
59 |
+
positional_encoding=dict( # SinePositionalEncoding
|
60 |
+
num_feats=128, normalize=True),
|
61 |
+
init_cfg=None),
|
62 |
+
enforce_decoder_input_project=False,
|
63 |
+
positional_encoding=dict( # SinePositionalEncoding
|
64 |
+
num_feats=128, normalize=True),
|
65 |
+
transformer_decoder=dict( # Mask2FormerTransformerDecoder
|
66 |
+
return_intermediate=True,
|
67 |
+
num_layers=9,
|
68 |
+
layer_cfg=dict( # Mask2FormerTransformerDecoderLayer
|
69 |
+
self_attn_cfg=dict( # MultiheadAttention
|
70 |
+
embed_dims=256,
|
71 |
+
num_heads=8,
|
72 |
+
attn_drop=0.0,
|
73 |
+
proj_drop=0.0,
|
74 |
+
dropout_layer=None,
|
75 |
+
batch_first=True),
|
76 |
+
cross_attn_cfg=dict( # MultiheadAttention
|
77 |
+
embed_dims=256,
|
78 |
+
num_heads=8,
|
79 |
+
attn_drop=0.0,
|
80 |
+
proj_drop=0.0,
|
81 |
+
dropout_layer=None,
|
82 |
+
batch_first=True),
|
83 |
+
ffn_cfg=dict(
|
84 |
+
embed_dims=256,
|
85 |
+
feedforward_channels=2048,
|
86 |
+
num_fcs=2,
|
87 |
+
act_cfg=dict(type='ReLU', inplace=True),
|
88 |
+
ffn_drop=0.0,
|
89 |
+
dropout_layer=None,
|
90 |
+
add_identity=True)),
|
91 |
+
init_cfg=None),
|
92 |
+
loss_cls=dict(
|
93 |
+
type='mmdet.CrossEntropyLoss',
|
94 |
+
use_sigmoid=False,
|
95 |
+
loss_weight=2.0,
|
96 |
+
reduction='mean',
|
97 |
+
class_weight=[1.0] * num_classes + [0.1]),
|
98 |
+
loss_mask=dict(
|
99 |
+
type='mmdet.CrossEntropyLoss',
|
100 |
+
use_sigmoid=True,
|
101 |
+
reduction='mean',
|
102 |
+
loss_weight=5.0),
|
103 |
+
loss_dice=dict(
|
104 |
+
type='mmdet.DiceLoss',
|
105 |
+
use_sigmoid=True,
|
106 |
+
activate=True,
|
107 |
+
reduction='mean',
|
108 |
+
naive_dice=True,
|
109 |
+
eps=1.0,
|
110 |
+
loss_weight=5.0),
|
111 |
+
train_cfg=dict(
|
112 |
+
num_points=12544,
|
113 |
+
oversample_ratio=3.0,
|
114 |
+
importance_sample_ratio=0.75,
|
115 |
+
assigner=dict(
|
116 |
+
type='mmdet.HungarianAssigner',
|
117 |
+
match_costs=[
|
118 |
+
dict(type='mmdet.ClassificationCost', weight=2.0),
|
119 |
+
dict(
|
120 |
+
type='mmdet.CrossEntropyLossCost',
|
121 |
+
weight=5.0,
|
122 |
+
use_sigmoid=True),
|
123 |
+
dict(
|
124 |
+
type='mmdet.DiceCost',
|
125 |
+
weight=5.0,
|
126 |
+
pred_act=True,
|
127 |
+
eps=1.0)
|
128 |
+
]),
|
129 |
+
sampler=dict(type='mmdet.MaskPseudoSampler'))),
|
130 |
+
train_cfg=dict(),
|
131 |
+
test_cfg=dict(mode='whole'))
|
configs/_base_/models/mask2former_swin-b.py
ADDED
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pretrained = "https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_base_patch4_window12_384_20220317-55b0104a.pth" # noqa
|
2 |
+
|
3 |
+
data_preprocessor = dict(
|
4 |
+
type="SegDataPreProcessor",
|
5 |
+
mean=[123.675, 116.28, 103.53],
|
6 |
+
std=[58.395, 57.12, 57.375],
|
7 |
+
bgr_to_rgb=True,
|
8 |
+
pad_val=0,
|
9 |
+
seg_pad_val=255,
|
10 |
+
size=(256, 256),
|
11 |
+
)
|
12 |
+
num_classes = 150
|
13 |
+
|
14 |
+
depths = [2, 2, 18, 2]
|
15 |
+
model = dict(
|
16 |
+
type="EncoderDecoder",
|
17 |
+
data_preprocessor=data_preprocessor,
|
18 |
+
backbone=dict(
|
19 |
+
type="SwinTransformer",
|
20 |
+
pretrain_img_size=384,
|
21 |
+
embed_dims=128,
|
22 |
+
depths=depths,
|
23 |
+
num_heads=[4, 8, 16, 32],
|
24 |
+
window_size=12,
|
25 |
+
mlp_ratio=4,
|
26 |
+
qkv_bias=True,
|
27 |
+
qk_scale=None,
|
28 |
+
drop_rate=0.0,
|
29 |
+
attn_drop_rate=0.0,
|
30 |
+
drop_path_rate=0.3,
|
31 |
+
patch_norm=True,
|
32 |
+
out_indices=(0, 1, 2, 3),
|
33 |
+
with_cp=False,
|
34 |
+
frozen_stages=-1,
|
35 |
+
init_cfg=dict(type="Pretrained", checkpoint=pretrained),
|
36 |
+
),
|
37 |
+
decode_head=dict(
|
38 |
+
type="Mask2FormerHead",
|
39 |
+
in_channels=[128, 256, 512, 1024],
|
40 |
+
strides=[4, 8, 16, 32],
|
41 |
+
feat_channels=256,
|
42 |
+
out_channels=256,
|
43 |
+
num_classes=num_classes,
|
44 |
+
num_queries=100,
|
45 |
+
num_transformer_feat_level=3,
|
46 |
+
align_corners=False,
|
47 |
+
pixel_decoder=dict(
|
48 |
+
type="mmdet.MSDeformAttnPixelDecoder",
|
49 |
+
num_outs=3,
|
50 |
+
norm_cfg=dict(type="GN", num_groups=32),
|
51 |
+
act_cfg=dict(type="ReLU"),
|
52 |
+
encoder=dict( # DeformableDetrTransformerEncoder
|
53 |
+
num_layers=6,
|
54 |
+
layer_cfg=dict( # DeformableDetrTransformerEncoderLayer
|
55 |
+
self_attn_cfg=dict( # MultiScaleDeformableAttention
|
56 |
+
embed_dims=256,
|
57 |
+
num_heads=8,
|
58 |
+
num_levels=3,
|
59 |
+
num_points=4,
|
60 |
+
im2col_step=64,
|
61 |
+
dropout=0.0,
|
62 |
+
batch_first=True,
|
63 |
+
norm_cfg=None,
|
64 |
+
init_cfg=None,
|
65 |
+
),
|
66 |
+
ffn_cfg=dict(
|
67 |
+
embed_dims=256,
|
68 |
+
feedforward_channels=1024,
|
69 |
+
num_fcs=2,
|
70 |
+
ffn_drop=0.0,
|
71 |
+
act_cfg=dict(type="ReLU", inplace=True),
|
72 |
+
),
|
73 |
+
),
|
74 |
+
init_cfg=None,
|
75 |
+
),
|
76 |
+
positional_encoding=dict( # SinePositionalEncoding
|
77 |
+
num_feats=128, normalize=True
|
78 |
+
),
|
79 |
+
init_cfg=None,
|
80 |
+
),
|
81 |
+
enforce_decoder_input_project=False,
|
82 |
+
positional_encoding=dict( # SinePositionalEncoding
|
83 |
+
num_feats=128, normalize=True
|
84 |
+
),
|
85 |
+
transformer_decoder=dict( # Mask2FormerTransformerDecoder
|
86 |
+
return_intermediate=True,
|
87 |
+
num_layers=9,
|
88 |
+
layer_cfg=dict( # Mask2FormerTransformerDecoderLayer
|
89 |
+
self_attn_cfg=dict( # MultiheadAttention
|
90 |
+
embed_dims=256,
|
91 |
+
num_heads=8,
|
92 |
+
attn_drop=0.0,
|
93 |
+
proj_drop=0.0,
|
94 |
+
dropout_layer=None,
|
95 |
+
batch_first=True,
|
96 |
+
),
|
97 |
+
cross_attn_cfg=dict( # MultiheadAttention
|
98 |
+
embed_dims=256,
|
99 |
+
num_heads=8,
|
100 |
+
attn_drop=0.0,
|
101 |
+
proj_drop=0.0,
|
102 |
+
dropout_layer=None,
|
103 |
+
batch_first=True,
|
104 |
+
),
|
105 |
+
ffn_cfg=dict(
|
106 |
+
embed_dims=256,
|
107 |
+
feedforward_channels=2048,
|
108 |
+
num_fcs=2,
|
109 |
+
act_cfg=dict(type="ReLU", inplace=True),
|
110 |
+
ffn_drop=0.0,
|
111 |
+
dropout_layer=None,
|
112 |
+
add_identity=True,
|
113 |
+
),
|
114 |
+
),
|
115 |
+
init_cfg=None,
|
116 |
+
),
|
117 |
+
loss_cls=dict(
|
118 |
+
type="mmdet.CrossEntropyLoss",
|
119 |
+
use_sigmoid=False,
|
120 |
+
loss_weight=2.0,
|
121 |
+
reduction="mean",
|
122 |
+
class_weight=[1.0] * num_classes + [0.1],
|
123 |
+
),
|
124 |
+
loss_mask=dict(
|
125 |
+
type="mmdet.CrossEntropyLoss",
|
126 |
+
use_sigmoid=True,
|
127 |
+
reduction="mean",
|
128 |
+
loss_weight=5.0,
|
129 |
+
),
|
130 |
+
loss_dice=dict(
|
131 |
+
type="mmdet.DiceLoss",
|
132 |
+
use_sigmoid=True,
|
133 |
+
activate=True,
|
134 |
+
reduction="mean",
|
135 |
+
naive_dice=True,
|
136 |
+
eps=1.0,
|
137 |
+
loss_weight=5.0,
|
138 |
+
),
|
139 |
+
train_cfg=dict(
|
140 |
+
num_points=12544,
|
141 |
+
oversample_ratio=3.0,
|
142 |
+
importance_sample_ratio=0.75,
|
143 |
+
assigner=dict(
|
144 |
+
type="mmdet.HungarianAssigner",
|
145 |
+
match_costs=[
|
146 |
+
dict(type="mmdet.ClassificationCost", weight=2.0),
|
147 |
+
dict(
|
148 |
+
type="mmdet.CrossEntropyLossCost", weight=5.0, use_sigmoid=True
|
149 |
+
),
|
150 |
+
dict(type="mmdet.DiceCost", weight=5.0, pred_act=True, eps=1.0),
|
151 |
+
],
|
152 |
+
),
|
153 |
+
sampler=dict(type="mmdet.MaskPseudoSampler"),
|
154 |
+
),
|
155 |
+
),
|
156 |
+
train_cfg=dict(),
|
157 |
+
test_cfg=dict(mode="whole"),
|
158 |
+
)
|
configs/_base_/models/pspnet_r50-d8.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model settings
|
2 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
3 |
+
data_preprocessor = dict(
|
4 |
+
type='SegDataPreProcessor',
|
5 |
+
mean=[123.675, 116.28, 103.53],
|
6 |
+
std=[58.395, 57.12, 57.375],
|
7 |
+
bgr_to_rgb=True,
|
8 |
+
pad_val=0,
|
9 |
+
seg_pad_val=255)
|
10 |
+
model = dict(
|
11 |
+
type='EncoderDecoder',
|
12 |
+
data_preprocessor=data_preprocessor,
|
13 |
+
pretrained='open-mmlab://resnet50_v1c',
|
14 |
+
backbone=dict(
|
15 |
+
type='ResNetV1c',
|
16 |
+
depth=50,
|
17 |
+
num_stages=4,
|
18 |
+
out_indices=(0, 1, 2, 3),
|
19 |
+
dilations=(1, 1, 2, 4),
|
20 |
+
strides=(1, 2, 1, 1),
|
21 |
+
norm_cfg=norm_cfg,
|
22 |
+
norm_eval=False,
|
23 |
+
style='pytorch',
|
24 |
+
contract_dilation=True),
|
25 |
+
decode_head=dict(
|
26 |
+
type='PSPHead',
|
27 |
+
in_channels=2048,
|
28 |
+
in_index=3,
|
29 |
+
channels=512,
|
30 |
+
pool_scales=(1, 2, 3, 6),
|
31 |
+
dropout_ratio=0.1,
|
32 |
+
num_classes=19,
|
33 |
+
norm_cfg=norm_cfg,
|
34 |
+
align_corners=False,
|
35 |
+
loss_decode=dict(
|
36 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
37 |
+
auxiliary_head=dict(
|
38 |
+
type='FCNHead',
|
39 |
+
in_channels=1024,
|
40 |
+
in_index=2,
|
41 |
+
channels=256,
|
42 |
+
num_convs=1,
|
43 |
+
concat_input=False,
|
44 |
+
dropout_ratio=0.1,
|
45 |
+
num_classes=19,
|
46 |
+
norm_cfg=norm_cfg,
|
47 |
+
align_corners=False,
|
48 |
+
loss_decode=dict(
|
49 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
50 |
+
# model training and testing settings
|
51 |
+
train_cfg=dict(),
|
52 |
+
test_cfg=dict(mode='whole'))
|
configs/_base_/models/segformer_mit-b0.py
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model settings
|
2 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
3 |
+
data_preprocessor = dict(
|
4 |
+
type='SegDataPreProcessor',
|
5 |
+
mean=[123.675, 116.28, 103.53],
|
6 |
+
std=[58.395, 57.12, 57.375],
|
7 |
+
bgr_to_rgb=True,
|
8 |
+
pad_val=0,
|
9 |
+
seg_pad_val=255)
|
10 |
+
model = dict(
|
11 |
+
type='EncoderDecoder',
|
12 |
+
data_preprocessor=data_preprocessor,
|
13 |
+
pretrained=None,
|
14 |
+
backbone=dict(
|
15 |
+
type='MixVisionTransformer',
|
16 |
+
in_channels=3,
|
17 |
+
embed_dims=32,
|
18 |
+
num_stages=4,
|
19 |
+
num_layers=[2, 2, 2, 2],
|
20 |
+
num_heads=[1, 2, 5, 8],
|
21 |
+
patch_sizes=[7, 3, 3, 3],
|
22 |
+
sr_ratios=[8, 4, 2, 1],
|
23 |
+
out_indices=(0, 1, 2, 3),
|
24 |
+
mlp_ratio=4,
|
25 |
+
qkv_bias=True,
|
26 |
+
drop_rate=0.0,
|
27 |
+
attn_drop_rate=0.0,
|
28 |
+
drop_path_rate=0.1),
|
29 |
+
decode_head=dict(
|
30 |
+
type='SegformerHead',
|
31 |
+
in_channels=[32, 64, 160, 256],
|
32 |
+
in_index=[0, 1, 2, 3],
|
33 |
+
channels=256,
|
34 |
+
dropout_ratio=0.1,
|
35 |
+
num_classes=19,
|
36 |
+
norm_cfg=norm_cfg,
|
37 |
+
align_corners=False,
|
38 |
+
loss_decode=dict(
|
39 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
40 |
+
# model training and testing settings
|
41 |
+
train_cfg=dict(),
|
42 |
+
test_cfg=dict(mode='whole'))
|
configs/_base_/models/tiny_vit_segformer_vegseg.py
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# model settings
|
2 |
+
norm_cfg = dict(type="SyncBN", requires_grad=True)
|
3 |
+
data_preprocessor = dict(
|
4 |
+
type="SegDataPreProcessor",
|
5 |
+
mean=[123.675, 116.28, 103.53],
|
6 |
+
std=[58.395, 57.12, 57.375],
|
7 |
+
bgr_to_rgb=True,
|
8 |
+
pad_val=0,
|
9 |
+
seg_pad_val=255,
|
10 |
+
)
|
11 |
+
|
12 |
+
model = dict(
|
13 |
+
type="DistillEncoderDecoder",
|
14 |
+
data_preprocessor=data_preprocessor,
|
15 |
+
pretrained=None,
|
16 |
+
teach_backbone=dict(
|
17 |
+
type="mmpretrain.VisionTransformer",
|
18 |
+
arch="base",
|
19 |
+
frozen_stages=12,
|
20 |
+
img_size=256,
|
21 |
+
patch_size=14,
|
22 |
+
layer_scale_init_value=1e-5,
|
23 |
+
out_indices=(2, 5, 8, 11),
|
24 |
+
out_type="featmap",
|
25 |
+
init_cfg=dict(
|
26 |
+
type="Pretrained",
|
27 |
+
checkpoint="checkpoints/dinov2-base.pth",
|
28 |
+
prefix="backbone",
|
29 |
+
),
|
30 |
+
),
|
31 |
+
backbone=dict(
|
32 |
+
type="mmpretrain.TinyViT",
|
33 |
+
arch="5m",
|
34 |
+
img_size=(256, 256),
|
35 |
+
window_size=[7, 7, 14, 7],
|
36 |
+
out_indices=(0, 1, 2, 3),
|
37 |
+
drop_path_rate=0.0,
|
38 |
+
gap_before_final_norm=False,
|
39 |
+
init_cfg=dict(
|
40 |
+
type="Pretrained",
|
41 |
+
checkpoint="checkpoints/tiny_vit_5m_imagenet.pth",
|
42 |
+
prefix="backbone",
|
43 |
+
),
|
44 |
+
),
|
45 |
+
fam=dict(
|
46 |
+
type="FAM",
|
47 |
+
in_channels=[128, 160, 320, 320],
|
48 |
+
out_channels=768,
|
49 |
+
output_size=19,
|
50 |
+
),
|
51 |
+
decode_head=dict(
|
52 |
+
type="SegformerHead",
|
53 |
+
in_channels=[768, 768, 768, 768],
|
54 |
+
in_index=[0, 1, 2, 3],
|
55 |
+
channels=256,
|
56 |
+
dropout_ratio=0.1,
|
57 |
+
num_classes=5,
|
58 |
+
norm_cfg=norm_cfg,
|
59 |
+
align_corners=False,
|
60 |
+
loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0),
|
61 |
+
),
|
62 |
+
auxiliary_head=dict(
|
63 |
+
type="FCNHead",
|
64 |
+
in_channels=768,
|
65 |
+
in_index=2,
|
66 |
+
channels=256,
|
67 |
+
num_convs=1,
|
68 |
+
concat_input=False,
|
69 |
+
dropout_ratio=0.1,
|
70 |
+
num_classes=5,
|
71 |
+
norm_cfg=norm_cfg,
|
72 |
+
align_corners=False,
|
73 |
+
loss_decode=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4),
|
74 |
+
),
|
75 |
+
# model training and testing settings
|
76 |
+
train_cfg=dict(),
|
77 |
+
test_cfg=dict(mode="whole"),
|
78 |
+
)
|
configs/_base_/schedules/grass_schedule.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# optimizer
|
2 |
+
optim_wrapper = dict(
|
3 |
+
type="OptimWrapper",
|
4 |
+
optimizer=dict(type="AdamW", lr=0.0006, betas=(0.9, 0.999), weight_decay=0.01),
|
5 |
+
paramwise_cfg=dict(
|
6 |
+
custom_keys={
|
7 |
+
"absolute_pos_embed": dict(decay_mult=0.0),
|
8 |
+
"relative_position_bias_table": dict(decay_mult=0.0),
|
9 |
+
"norm": dict(decay_mult=0.0),
|
10 |
+
}
|
11 |
+
),
|
12 |
+
)
|
13 |
+
# learning policy
|
14 |
+
param_scheduler = [
|
15 |
+
dict(type="LinearLR", start_factor=1e-3, by_epoch=False, begin=0, end=230*5),
|
16 |
+
dict(
|
17 |
+
type="PolyLR",
|
18 |
+
eta_min=0.0,
|
19 |
+
power=0.9,
|
20 |
+
begin=230*5,
|
21 |
+
end=23000,
|
22 |
+
by_epoch=False,
|
23 |
+
),
|
24 |
+
]
|
25 |
+
# training schedule for 40k
|
26 |
+
train_cfg = dict(type="IterBasedTrainLoop", max_iters=23000, val_interval=230)
|
27 |
+
val_cfg = dict(type="ValLoop")
|
28 |
+
test_cfg = dict(type="TestLoop")
|
29 |
+
default_hooks = dict(
|
30 |
+
timer=dict(type="IterTimerHook"),
|
31 |
+
logger=dict(type="LoggerHook", interval=230, log_metric_by_epoch=False),
|
32 |
+
param_scheduler=dict(type="ParamSchedulerHook"),
|
33 |
+
checkpoint=dict(
|
34 |
+
type="CheckpointHook",
|
35 |
+
by_epoch=False,
|
36 |
+
interval=230,
|
37 |
+
save_best=["mIoU"],
|
38 |
+
rule=["greater"],
|
39 |
+
max_keep_ckpts=1,
|
40 |
+
),
|
41 |
+
sampler_seed=dict(type="DistSamplerSeedHook"),
|
42 |
+
visualization=dict(type="SegVisualizationHook"),
|
43 |
+
)
|
configs/_base_/schedules/other_dataset_scedule.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# optimizer
|
2 |
+
optim_wrapper = dict(
|
3 |
+
type="OptimWrapper",
|
4 |
+
optimizer=dict(type="AdamW", lr=0.0006, betas=(0.9, 0.999), weight_decay=0.01),
|
5 |
+
paramwise_cfg=dict(
|
6 |
+
custom_keys={
|
7 |
+
"absolute_pos_embed": dict(decay_mult=0.0),
|
8 |
+
"relative_position_bias_table": dict(decay_mult=0.0),
|
9 |
+
"norm": dict(decay_mult=0.0),
|
10 |
+
}
|
11 |
+
),
|
12 |
+
)
|
13 |
+
# learning policy
|
14 |
+
param_scheduler = [
|
15 |
+
dict(type="LinearLR", start_factor=1e-3, by_epoch=False, begin=0, end=2000),
|
16 |
+
dict(
|
17 |
+
type="PolyLR",
|
18 |
+
eta_min=0.0,
|
19 |
+
power=0.9,
|
20 |
+
begin=1000,
|
21 |
+
end=80000,
|
22 |
+
by_epoch=False,
|
23 |
+
),
|
24 |
+
]
|
25 |
+
# training schedule for 40k
|
26 |
+
train_cfg = dict(type="IterBasedTrainLoop", max_iters=80000, val_interval=1000)
|
27 |
+
val_cfg = dict(type="ValLoop")
|
28 |
+
test_cfg = dict(type="TestLoop")
|
29 |
+
default_hooks = dict(
|
30 |
+
timer=dict(type="IterTimerHook"),
|
31 |
+
logger=dict(type="LoggerHook", interval=1000, log_metric_by_epoch=False),
|
32 |
+
param_scheduler=dict(type="ParamSchedulerHook"),
|
33 |
+
checkpoint=dict(
|
34 |
+
type="CheckpointHook",
|
35 |
+
by_epoch=False,
|
36 |
+
interval=1000,
|
37 |
+
save_best=["mIoU"],
|
38 |
+
rule=["greater"],
|
39 |
+
max_keep_ckpts=1,
|
40 |
+
),
|
41 |
+
sampler_seed=dict(type="DistSamplerSeedHook"),
|
42 |
+
visualization=dict(type="SegVisualizationHook"),
|
43 |
+
)
|
configs/_base_/schedules/schedule_160k.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# optimizer
|
2 |
+
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
3 |
+
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
|
4 |
+
# learning policy
|
5 |
+
param_scheduler = [
|
6 |
+
dict(
|
7 |
+
type='PolyLR',
|
8 |
+
eta_min=1e-4,
|
9 |
+
power=0.9,
|
10 |
+
begin=0,
|
11 |
+
end=160000,
|
12 |
+
by_epoch=False)
|
13 |
+
]
|
14 |
+
# training schedule for 160k
|
15 |
+
train_cfg = dict(
|
16 |
+
type='IterBasedTrainLoop', max_iters=160000, val_interval=16000)
|
17 |
+
val_cfg = dict(type='ValLoop')
|
18 |
+
test_cfg = dict(type='TestLoop')
|
19 |
+
default_hooks = dict(
|
20 |
+
timer=dict(type='IterTimerHook'),
|
21 |
+
logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
|
22 |
+
param_scheduler=dict(type='ParamSchedulerHook'),
|
23 |
+
checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=16000),
|
24 |
+
sampler_seed=dict(type='DistSamplerSeedHook'),
|
25 |
+
visualization=dict(type='SegVisualizationHook'))
|
configs/_base_/schedules/schedule_20k.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# optimizer
|
2 |
+
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
3 |
+
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
|
4 |
+
# learning policy
|
5 |
+
param_scheduler = [
|
6 |
+
dict(
|
7 |
+
type='PolyLR',
|
8 |
+
eta_min=1e-4,
|
9 |
+
power=0.9,
|
10 |
+
begin=0,
|
11 |
+
end=20000,
|
12 |
+
by_epoch=False)
|
13 |
+
]
|
14 |
+
# training schedule for 20k
|
15 |
+
train_cfg = dict(type='IterBasedTrainLoop', max_iters=20000, val_interval=2000)
|
16 |
+
val_cfg = dict(type='ValLoop')
|
17 |
+
test_cfg = dict(type='TestLoop')
|
18 |
+
default_hooks = dict(
|
19 |
+
timer=dict(type='IterTimerHook'),
|
20 |
+
logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
|
21 |
+
param_scheduler=dict(type='ParamSchedulerHook'),
|
22 |
+
checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=2000),
|
23 |
+
sampler_seed=dict(type='DistSamplerSeedHook'),
|
24 |
+
visualization=dict(type='SegVisualizationHook'))
|
configs/_base_/schedules/schedule_240k.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# optimizer
|
2 |
+
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
3 |
+
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
|
4 |
+
# learning policy
|
5 |
+
param_scheduler = [
|
6 |
+
dict(
|
7 |
+
type='PolyLR',
|
8 |
+
eta_min=1e-4,
|
9 |
+
power=0.9,
|
10 |
+
begin=0,
|
11 |
+
end=240000,
|
12 |
+
by_epoch=False)
|
13 |
+
]
|
14 |
+
# training schedule for 240k
|
15 |
+
train_cfg = dict(
|
16 |
+
type='IterBasedTrainLoop', max_iters=240000, val_interval=24000)
|
17 |
+
val_cfg = dict(type='ValLoop')
|
18 |
+
test_cfg = dict(type='TestLoop')
|
19 |
+
default_hooks = dict(
|
20 |
+
timer=dict(type='IterTimerHook'),
|
21 |
+
logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
|
22 |
+
param_scheduler=dict(type='ParamSchedulerHook'),
|
23 |
+
checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=24000),
|
24 |
+
sampler_seed=dict(type='DistSamplerSeedHook'),
|
25 |
+
visualization=dict(type='SegVisualizationHook'))
|
configs/_base_/schedules/schedule_25k.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# optimizer
|
2 |
+
optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.1)
|
3 |
+
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
|
4 |
+
# learning policy
|
5 |
+
param_scheduler = [
|
6 |
+
dict(
|
7 |
+
type='LinearLR', start_factor=3e-2, begin=0, end=12000,
|
8 |
+
by_epoch=False),
|
9 |
+
dict(
|
10 |
+
type='PolyLRRatio',
|
11 |
+
eta_min_ratio=3e-2,
|
12 |
+
power=0.9,
|
13 |
+
begin=12000,
|
14 |
+
end=24000,
|
15 |
+
by_epoch=False),
|
16 |
+
dict(type='ConstantLR', by_epoch=False, factor=1, begin=24000, end=25000)
|
17 |
+
]
|
18 |
+
# training schedule for 25k
|
19 |
+
train_cfg = dict(type='IterBasedTrainLoop', max_iters=25000, val_interval=1000)
|
20 |
+
val_cfg = dict(type='ValLoop')
|
21 |
+
test_cfg = dict(type='TestLoop')
|
22 |
+
default_hooks = dict(
|
23 |
+
timer=dict(type='IterTimerHook'),
|
24 |
+
logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
|
25 |
+
param_scheduler=dict(type='ParamSchedulerHook'),
|
26 |
+
checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=2000),
|
27 |
+
sampler_seed=dict(type='DistSamplerSeedHook'),
|
28 |
+
visualization=dict(type='SegVisualizationHook'))
|
configs/_base_/schedules/schedule_320k.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# optimizer
|
2 |
+
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
3 |
+
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
|
4 |
+
# learning policy
|
5 |
+
param_scheduler = [
|
6 |
+
dict(
|
7 |
+
type='PolyLR',
|
8 |
+
eta_min=1e-4,
|
9 |
+
power=0.9,
|
10 |
+
begin=0,
|
11 |
+
end=320000,
|
12 |
+
by_epoch=False)
|
13 |
+
]
|
14 |
+
# training schedule for 320k
|
15 |
+
train_cfg = dict(
|
16 |
+
type='IterBasedTrainLoop', max_iters=320000, val_interval=32000)
|
17 |
+
val_cfg = dict(type='ValLoop')
|
18 |
+
test_cfg = dict(type='TestLoop')
|
19 |
+
default_hooks = dict(
|
20 |
+
timer=dict(type='IterTimerHook'),
|
21 |
+
logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
|
22 |
+
param_scheduler=dict(type='ParamSchedulerHook'),
|
23 |
+
checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=32000),
|
24 |
+
sampler_seed=dict(type='DistSamplerSeedHook'),
|
25 |
+
visualization=dict(type='SegVisualizationHook'))
|
configs/_base_/schedules/schedule_40k.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# optimizer
|
2 |
+
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
3 |
+
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
|
4 |
+
# learning policy
|
5 |
+
param_scheduler = [
|
6 |
+
dict(
|
7 |
+
type='PolyLR',
|
8 |
+
eta_min=1e-4,
|
9 |
+
power=0.9,
|
10 |
+
begin=0,
|
11 |
+
end=40000,
|
12 |
+
by_epoch=False)
|
13 |
+
]
|
14 |
+
# training schedule for 40k
|
15 |
+
train_cfg = dict(type='IterBasedTrainLoop', max_iters=40000, val_interval=4000)
|
16 |
+
val_cfg = dict(type='ValLoop')
|
17 |
+
test_cfg = dict(type='TestLoop')
|
18 |
+
default_hooks = dict(
|
19 |
+
timer=dict(type='IterTimerHook'),
|
20 |
+
logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
|
21 |
+
param_scheduler=dict(type='ParamSchedulerHook'),
|
22 |
+
checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=4000),
|
23 |
+
sampler_seed=dict(type='DistSamplerSeedHook'),
|
24 |
+
visualization=dict(type='SegVisualizationHook'))
|
configs/_base_/schedules/schedule_80k.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# optimizer
|
2 |
+
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0005)
|
3 |
+
optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None)
|
4 |
+
# learning policy
|
5 |
+
param_scheduler = [
|
6 |
+
dict(
|
7 |
+
type='PolyLR',
|
8 |
+
eta_min=1e-4,
|
9 |
+
power=0.9,
|
10 |
+
begin=0,
|
11 |
+
end=80000,
|
12 |
+
by_epoch=False)
|
13 |
+
]
|
14 |
+
# training schedule for 80k
|
15 |
+
train_cfg = dict(type='IterBasedTrainLoop', max_iters=80000, val_interval=8000)
|
16 |
+
val_cfg = dict(type='ValLoop')
|
17 |
+
test_cfg = dict(type='TestLoop')
|
18 |
+
default_hooks = dict(
|
19 |
+
timer=dict(type='IterTimerHook'),
|
20 |
+
logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
|
21 |
+
param_scheduler=dict(type='ParamSchedulerHook'),
|
22 |
+
checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=8000),
|
23 |
+
sampler_seed=dict(type='DistSamplerSeedHook'),
|
24 |
+
visualization=dict(type='SegVisualizationHook'))
|
configs/_base_/schedules/water_schedule.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# optimizer
|
2 |
+
optim_wrapper = dict(
|
3 |
+
type="OptimWrapper",
|
4 |
+
optimizer=dict(type="AdamW", lr=0.0006, betas=(0.9, 0.999), weight_decay=0.01),
|
5 |
+
paramwise_cfg=dict(
|
6 |
+
custom_keys={
|
7 |
+
"absolute_pos_embed": dict(decay_mult=0.0),
|
8 |
+
"relative_position_bias_table": dict(decay_mult=0.0),
|
9 |
+
"norm": dict(decay_mult=0.0),
|
10 |
+
}
|
11 |
+
),
|
12 |
+
)
|
13 |
+
# learning policy
|
14 |
+
param_scheduler = [
|
15 |
+
dict(type="LinearLR", start_factor=1e-3, by_epoch=False, begin=0, end=760*5),
|
16 |
+
dict(
|
17 |
+
type="PolyLR",
|
18 |
+
eta_min=0.0,
|
19 |
+
power=0.9,
|
20 |
+
begin=760*5,
|
21 |
+
end=76000,
|
22 |
+
by_epoch=False,
|
23 |
+
),
|
24 |
+
]
|
25 |
+
# training schedule for 40k
|
26 |
+
train_cfg = dict(type="IterBasedTrainLoop", max_iters=76000, val_interval=760)
|
27 |
+
val_cfg = dict(type="ValLoop")
|
28 |
+
test_cfg = dict(type="TestLoop")
|
29 |
+
default_hooks = dict(
|
30 |
+
timer=dict(type="IterTimerHook"),
|
31 |
+
logger=dict(type="LoggerHook", interval=760, log_metric_by_epoch=False),
|
32 |
+
param_scheduler=dict(type="ParamSchedulerHook"),
|
33 |
+
checkpoint=dict(
|
34 |
+
type="CheckpointHook",
|
35 |
+
by_epoch=False,
|
36 |
+
interval=760,
|
37 |
+
save_best=["mIoU"],
|
38 |
+
rule=["greater"],
|
39 |
+
max_keep_ckpts=1,
|
40 |
+
),
|
41 |
+
sampler_seed=dict(type="DistSamplerSeedHook"),
|
42 |
+
visualization=dict(type="SegVisualizationHook"),
|
43 |
+
)
|
configs/convnext/convnext-v2-femto_upernet.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
"../_base_/models/convnext_upernet.py",
|
3 |
+
"../_base_/datasets/grass.py",
|
4 |
+
"../_base_/default_runtime.py",
|
5 |
+
"../_base_/schedules/grass_schedule.py",
|
6 |
+
]
|
7 |
+
|
8 |
+
data_preprocessor = dict(size=(256, 256))
|
9 |
+
model = dict(
|
10 |
+
data_preprocessor=data_preprocessor,
|
11 |
+
decode_head=dict(num_classes=5),
|
12 |
+
auxiliary_head=dict(num_classes=5)
|
13 |
+
)
|
configs/convnext/convnext_b_upernet.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
"../_base_/models/convnext_upernet.py",
|
3 |
+
"../_base_/datasets/grass.py",
|
4 |
+
"../_base_/default_runtime.py",
|
5 |
+
"../_base_/schedules/grass_schedule.py",
|
6 |
+
]
|
7 |
+
|
8 |
+
data_preprocessor = dict(size=(256, 256))
|
9 |
+
model = dict(
|
10 |
+
data_preprocessor=data_preprocessor,
|
11 |
+
decode_head=dict(num_classes=5),
|
12 |
+
auxiliary_head=dict(num_classes=5)
|
13 |
+
)
|
configs/deeplabv3plus/deeplabv3plus_r101.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
"../_base_/models/deeplabv3plus_r50-d8.py",
|
3 |
+
"../_base_/datasets/grass.py",
|
4 |
+
"../_base_/default_runtime.py",
|
5 |
+
"../_base_/schedules/grass_schedule.py",
|
6 |
+
]
|
7 |
+
|
8 |
+
data_preprocessor = dict(size=(256, 256))
|
9 |
+
model = dict(
|
10 |
+
data_preprocessor=data_preprocessor,
|
11 |
+
pretrained='open-mmlab://resnet101_v1c',
|
12 |
+
backbone=dict(depth=101),
|
13 |
+
decode_head=dict(num_classes=5),
|
14 |
+
auxiliary_head=dict(num_classes=5)
|
15 |
+
)
|
configs/deeplabv3plus/deeplabv3plus_r101_water.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
"../_base_/models/deeplabv3plus_r50-d8.py",
|
3 |
+
"../_base_/datasets/water.py",
|
4 |
+
"../_base_/default_runtime.py",
|
5 |
+
"../_base_/schedules/water_schedule.py",
|
6 |
+
]
|
7 |
+
|
8 |
+
data_preprocessor = dict(size=(512, 512))
|
9 |
+
model = dict(
|
10 |
+
data_preprocessor=data_preprocessor,
|
11 |
+
pretrained='open-mmlab://resnet101_v1c',
|
12 |
+
backbone=dict(depth=101),
|
13 |
+
decode_head=dict(num_classes=6),
|
14 |
+
auxiliary_head=dict(num_classes=6)
|
15 |
+
)
|
configs/dinov2/dinov2_upernet.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
"../_base_/models/dinov2_upernet.py",
|
3 |
+
"../_base_/datasets/grass.py",
|
4 |
+
"../_base_/default_runtime.py",
|
5 |
+
"../_base_/schedules/grass_schedule.py",
|
6 |
+
]
|
7 |
+
|
8 |
+
data_preprocessor = dict(size=(256, 256))
|
9 |
+
model = dict(
|
10 |
+
data_preprocessor=data_preprocessor,
|
11 |
+
decode_head=dict(num_classes=5),
|
12 |
+
auxiliary_head=dict(num_classes=5)
|
13 |
+
)
|
configs/dinov2/dinov2_upernet_focal_loss.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
"../_base_/models/dinov2_upernet.py",
|
3 |
+
"../_base_/datasets/grass.py",
|
4 |
+
"../_base_/default_runtime.py",
|
5 |
+
"../_base_/schedules/grass_schedule.py",
|
6 |
+
]
|
7 |
+
|
8 |
+
data_preprocessor = dict(size=(256, 256))
|
9 |
+
model = dict(
|
10 |
+
data_preprocessor=data_preprocessor,
|
11 |
+
decode_head=dict(
|
12 |
+
num_classes=5,
|
13 |
+
loss_decode=[
|
14 |
+
dict(type="FocalLoss", use_sigmoid=True, loss_weight=1.0),
|
15 |
+
dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0),
|
16 |
+
],
|
17 |
+
init_cfg=dict(
|
18 |
+
type="Pretrained",
|
19 |
+
checkpoint="work_dirs/dinov2_b_frozen-simpleAdapter/head.pth",
|
20 |
+
prefix="decode_head",
|
21 |
+
),
|
22 |
+
),
|
23 |
+
auxiliary_head=dict(
|
24 |
+
num_classes=5,
|
25 |
+
init_cfg=dict(
|
26 |
+
type="Pretrained",
|
27 |
+
checkpoint="work_dirs/dinov2_b_frozen-simpleAdapter/auxiliary_head.pth",
|
28 |
+
prefix="auxiliary_head",
|
29 |
+
),
|
30 |
+
),
|
31 |
+
)
|
configs/dinov2/full_dinov2_upernet.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
"../_base_/models/dinov2_upernet.py",
|
3 |
+
"../_base_/datasets/grass.py",
|
4 |
+
"../_base_/default_runtime.py",
|
5 |
+
"../_base_/schedules/grass_schedule.py",
|
6 |
+
]
|
7 |
+
|
8 |
+
data_preprocessor = dict(size=(256, 256))
|
9 |
+
model = dict(
|
10 |
+
data_preprocessor=data_preprocessor,
|
11 |
+
backbone=dict(
|
12 |
+
frozen_stages=-1
|
13 |
+
),
|
14 |
+
decode_head=dict(num_classes=5),
|
15 |
+
auxiliary_head=dict(num_classes=5)
|
16 |
+
)
|
configs/fcn/fcn_r101.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
"../_base_/models/fcn_r50-d8.py",
|
3 |
+
"../_base_/datasets/grass.py",
|
4 |
+
"../_base_/default_runtime.py",
|
5 |
+
"../_base_/schedules/grass_schedule.py",
|
6 |
+
]
|
7 |
+
|
8 |
+
data_preprocessor = dict(size=(256, 256))
|
9 |
+
model = dict(
|
10 |
+
data_preprocessor=data_preprocessor,
|
11 |
+
pretrained='open-mmlab://resnet101_v1c',
|
12 |
+
backbone=dict(depth=101),
|
13 |
+
decode_head=dict(num_classes=5),
|
14 |
+
auxiliary_head=dict(num_classes=5)
|
15 |
+
)
|
configs/fcn/fcn_r50.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
"../_base_/models/fcn_r50-d8.py",
|
3 |
+
"../_base_/datasets/grass.py",
|
4 |
+
"../_base_/default_runtime.py",
|
5 |
+
"../_base_/schedules/grass_schedule.py",
|
6 |
+
]
|
7 |
+
|
8 |
+
data_preprocessor = dict(size=(256, 256))
|
9 |
+
model = dict(
|
10 |
+
data_preprocessor=data_preprocessor,
|
11 |
+
decode_head=dict(num_classes=5),
|
12 |
+
auxiliary_head=dict(num_classes=5)
|
13 |
+
)
|
configs/fcn/fcn_r50_water.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
"../_base_/models/fcn_r50-d8.py",
|
3 |
+
"../_base_/datasets/water.py",
|
4 |
+
"../_base_/default_runtime.py",
|
5 |
+
"../_base_/schedules/water_schedule.py",
|
6 |
+
]
|
7 |
+
|
8 |
+
data_preprocessor = dict(size=(512, 512))
|
9 |
+
model = dict(
|
10 |
+
data_preprocessor=data_preprocessor,
|
11 |
+
decode_head=dict(num_classes=6),
|
12 |
+
auxiliary_head=dict(num_classes=6)
|
13 |
+
)
|
configs/ktda/convnextv2_femote_student_adapter_segmormer_head.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
"../_base_/models/convnextv2_femto_vit_segformer_vegseg.py",
|
3 |
+
"../_base_/datasets/grass.py",
|
4 |
+
"../_base_/default_runtime.py",
|
5 |
+
"../_base_/schedules/grass_schedule.py",
|
6 |
+
]
|
7 |
+
|
8 |
+
data_preprocessor = dict(size=(256, 256))
|
9 |
+
model = dict(
|
10 |
+
data_preprocessor=data_preprocessor,
|
11 |
+
decode_head=dict(num_classes=5),
|
12 |
+
auxiliary_head=dict(num_classes=5),
|
13 |
+
)
|
configs/ktda/dinov2_b_frozen-fam-fmm_focallLoss.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
"../_base_/models/ktda.py",
|
3 |
+
"../_base_/datasets/grass.py",
|
4 |
+
"../_base_/default_runtime.py",
|
5 |
+
"../_base_/schedules/grass_schedule.py",
|
6 |
+
]
|
7 |
+
|
8 |
+
data_preprocessor = dict(size=(256, 256))
|
9 |
+
model = dict(
|
10 |
+
data_preprocessor=data_preprocessor,
|
11 |
+
decode_head=dict(
|
12 |
+
num_classes=5,
|
13 |
+
loss_decode=[
|
14 |
+
dict(type="FocalLoss", use_sigmoid=True, loss_weight=1.0),
|
15 |
+
dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0),
|
16 |
+
],
|
17 |
+
),
|
18 |
+
auxiliary_head=dict(
|
19 |
+
num_classes=5,
|
20 |
+
),
|
21 |
+
fmm=dict(type="FMM", in_channels=[768, 768, 768, 768]),
|
22 |
+
)
|
configs/ktda/dinov2_b_frozen-fam-fmm_frozen_stu.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
"../_base_/models/ktda.py",
|
3 |
+
"../_base_/datasets/grass.py",
|
4 |
+
"../_base_/default_runtime.py",
|
5 |
+
"../_base_/schedules/grass_schedule.py",
|
6 |
+
]
|
7 |
+
|
8 |
+
data_preprocessor = dict(size=(256, 256))
|
9 |
+
model = dict(
|
10 |
+
data_preprocessor=data_preprocessor,
|
11 |
+
student_training=False,
|
12 |
+
backbone=dict(
|
13 |
+
frozen_stages=4,
|
14 |
+
),
|
15 |
+
decode_head=dict(
|
16 |
+
num_classes=5,
|
17 |
+
),
|
18 |
+
auxiliary_head=dict(
|
19 |
+
num_classes=5,
|
20 |
+
),
|
21 |
+
fmm=dict(type="FMM", in_channels=[768, 768, 768, 768]),
|
22 |
+
)
|
configs/ktda/dinov2_b_frozen-fam-fmm_not_distill.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
"../_base_/models/ktda.py",
|
3 |
+
"../_base_/datasets/grass.py",
|
4 |
+
"../_base_/default_runtime.py",
|
5 |
+
"../_base_/schedules/grass_schedule.py",
|
6 |
+
]
|
7 |
+
|
8 |
+
data_preprocessor = dict(size=(256, 256))
|
9 |
+
model = dict(
|
10 |
+
data_preprocessor=data_preprocessor,
|
11 |
+
student_training=False,
|
12 |
+
decode_head=dict(
|
13 |
+
num_classes=5,
|
14 |
+
),
|
15 |
+
auxiliary_head=dict(
|
16 |
+
num_classes=5,
|
17 |
+
),
|
18 |
+
fmm=dict(type="FMM", in_channels=[768, 768, 768, 768]),
|
19 |
+
)
|
configs/ktda/experiment_ab.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
"../_base_/models/tiny_vit_segformer_vegseg.py",
|
3 |
+
"../_base_/datasets/grass.py",
|
4 |
+
"../_base_/default_runtime.py",
|
5 |
+
"../_base_/schedules/grass_schedule.py",
|
6 |
+
]
|
7 |
+
|
8 |
+
data_preprocessor = dict(size=(256, 256))
|
9 |
+
model = dict(
|
10 |
+
teach_backbone=dict(
|
11 |
+
type="mmpretrain.VisionTransformer",
|
12 |
+
arch="large",
|
13 |
+
frozen_stages=24,
|
14 |
+
img_size=256,
|
15 |
+
patch_size=14,
|
16 |
+
layer_scale_init_value=1e-5,
|
17 |
+
out_indices=(7, 11, 15, 23),
|
18 |
+
out_type="featmap",
|
19 |
+
init_cfg=dict(
|
20 |
+
type="Pretrained",
|
21 |
+
checkpoint="checkpoints/dinov2-large.pth",
|
22 |
+
prefix="backbone",
|
23 |
+
),
|
24 |
+
),
|
25 |
+
fam=dict(out_channels=1024),
|
26 |
+
decode_head=dict(in_channels=[1024, 1024, 1024, 1024], num_classes=5),
|
27 |
+
data_preprocessor=data_preprocessor,
|
28 |
+
auxiliary_head=[
|
29 |
+
dict(
|
30 |
+
type="FCNHead",
|
31 |
+
in_channels=1024,
|
32 |
+
in_index=i,
|
33 |
+
channels=256,
|
34 |
+
num_convs=1,
|
35 |
+
concat_input=False,
|
36 |
+
dropout_ratio=0.1,
|
37 |
+
num_classes=5,
|
38 |
+
norm_cfg=dict(type="SyncBN", requires_grad=True),
|
39 |
+
align_corners=False,
|
40 |
+
loss_decode=dict(
|
41 |
+
type="CrossEntropyLoss", use_sigmoid=False, loss_weight=0.4
|
42 |
+
),
|
43 |
+
)
|
44 |
+
for i in range(4)
|
45 |
+
],
|
46 |
+
)
|
configs/ktda/experiment_c.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
"../_base_/models/ktda.py",
|
3 |
+
"../_base_/datasets/grass.py",
|
4 |
+
"../_base_/default_runtime.py",
|
5 |
+
"../_base_/schedules/grass_schedule.py",
|
6 |
+
]
|
7 |
+
|
8 |
+
data_preprocessor = dict(size=(256, 256))
|
9 |
+
model = dict(
|
10 |
+
data_preprocessor=data_preprocessor,
|
11 |
+
decode_head=dict(num_classes=5),
|
12 |
+
auxiliary_head=dict(num_classes=5),
|
13 |
+
fmm=dict(type="FMM", in_channels=[768, 768, 768, 768]),
|
14 |
+
)
|
configs/ktda/experiment_d.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
"../_base_/models/ktda.py",
|
3 |
+
"../_base_/datasets/grass.py",
|
4 |
+
"../_base_/default_runtime.py",
|
5 |
+
"../_base_/schedules/grass_schedule.py",
|
6 |
+
]
|
7 |
+
|
8 |
+
data_preprocessor = dict(size=(256, 256))
|
9 |
+
model = dict(
|
10 |
+
data_preprocessor=data_preprocessor,
|
11 |
+
decode_head=dict(num_classes=5),
|
12 |
+
auxiliary_head=None
|
13 |
+
)
|
configs/ktda/experiment_e.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
"../_base_/models/ktda.py",
|
3 |
+
"../_base_/datasets/grass.py",
|
4 |
+
"../_base_/default_runtime.py",
|
5 |
+
"../_base_/schedules/grass_schedule.py",
|
6 |
+
]
|
7 |
+
|
8 |
+
data_preprocessor = dict(size=(256, 256))
|
9 |
+
model = dict(
|
10 |
+
alpha=0,
|
11 |
+
data_preprocessor=data_preprocessor,
|
12 |
+
decode_head=dict(num_classes=5),
|
13 |
+
auxiliary_head=dict(num_classes=5),
|
14 |
+
fmm=dict(type="FMM", in_channels=[768, 768, 768, 768]),
|
15 |
+
)
|
configs/ktda/experiment_f.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
"../_base_/models/ktda.py",
|
3 |
+
"../_base_/datasets/grass.py",
|
4 |
+
"../_base_/default_runtime.py",
|
5 |
+
"../_base_/schedules/grass_schedule.py",
|
6 |
+
]
|
7 |
+
|
8 |
+
data_preprocessor = dict(size=(256, 256))
|
9 |
+
model = dict(
|
10 |
+
alpha=1,
|
11 |
+
data_preprocessor=data_preprocessor,
|
12 |
+
decode_head=dict(num_classes=5),
|
13 |
+
auxiliary_head=dict(num_classes=5),
|
14 |
+
fmm=dict(type="FMM", in_channels=[768, 768, 768, 768]),
|
15 |
+
)
|
configs/ktda/experiment_g.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
"../_base_/models/ktda.py",
|
3 |
+
"../_base_/datasets/grass.py",
|
4 |
+
"../_base_/default_runtime.py",
|
5 |
+
"../_base_/schedules/grass_schedule.py",
|
6 |
+
]
|
7 |
+
|
8 |
+
data_preprocessor = dict(size=(256, 256))
|
9 |
+
model = dict(
|
10 |
+
data_preprocessor=data_preprocessor,
|
11 |
+
teach_backbone=dict(out_indices=(11)),
|
12 |
+
backbone=dict(out_indices=(3)),
|
13 |
+
fam=dict(in_channels=[1024]),
|
14 |
+
neck=dict(in_channels=[768], scales=[1]),
|
15 |
+
decode_head=dict(num_classes=5,in_channels=[768],in_index=[0]),
|
16 |
+
auxiliary_head=dict(num_classes=5,in_index=0),
|
17 |
+
fmm=dict(type="FMM", in_channels=[768]),
|
18 |
+
)
|
configs/ktda/experiment_h.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
"../_base_/models/ktda.py",
|
3 |
+
"../_base_/datasets/grass.py",
|
4 |
+
"../_base_/default_runtime.py",
|
5 |
+
"../_base_/schedules/grass_schedule.py",
|
6 |
+
]
|
7 |
+
|
8 |
+
data_preprocessor = dict(size=(256, 256))
|
9 |
+
model = dict(
|
10 |
+
data_preprocessor=data_preprocessor,
|
11 |
+
fuse=True,
|
12 |
+
neck=dict(in_channels=[768], scales=[1]),
|
13 |
+
decode_head=dict(num_classes=5,in_channels=[768],in_index=[0]),
|
14 |
+
auxiliary_head=dict(num_classes=5,in_index=0),
|
15 |
+
fmm=dict(type="FMM", in_channels=[768]),
|
16 |
+
)
|
configs/ktda/experiment_i.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
"../_base_/models/ktda.py",
|
3 |
+
"../_base_/datasets/grass.py",
|
4 |
+
"../_base_/default_runtime.py",
|
5 |
+
"../_base_/schedules/grass_schedule.py",
|
6 |
+
]
|
7 |
+
|
8 |
+
data_preprocessor = dict(size=(256, 256))
|
9 |
+
model = dict(
|
10 |
+
data_preprocessor=data_preprocessor,
|
11 |
+
decode_head=dict(num_classes=5),
|
12 |
+
auxiliary_head=dict(num_classes=5),
|
13 |
+
fmm=dict(type="FMM", in_channels=[768, 768, 768, 768],mlp_nums=2),
|
14 |
+
)
|
configs/ktda/experiment_j.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
"../_base_/models/ktda.py",
|
3 |
+
"../_base_/datasets/grass.py",
|
4 |
+
"../_base_/default_runtime.py",
|
5 |
+
"../_base_/schedules/grass_schedule.py",
|
6 |
+
]
|
7 |
+
|
8 |
+
data_preprocessor = dict(size=(256, 256))
|
9 |
+
model = dict(
|
10 |
+
data_preprocessor=data_preprocessor,
|
11 |
+
decode_head=dict(num_classes=5),
|
12 |
+
auxiliary_head=dict(num_classes=5),
|
13 |
+
fmm=dict(type="FMM", in_channels=[768, 768, 768, 768],mlp_nums=3),
|
14 |
+
)
|
configs/ktda/experiment_l.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
"../_base_/models/ktda.py",
|
3 |
+
"../_base_/datasets/grass.py",
|
4 |
+
"../_base_/default_runtime.py",
|
5 |
+
"../_base_/schedules/grass_schedule.py",
|
6 |
+
]
|
7 |
+
|
8 |
+
data_preprocessor = dict(size=(256, 256))
|
9 |
+
model = dict(
|
10 |
+
data_preprocessor=data_preprocessor,
|
11 |
+
decode_head=dict(num_classes=5),
|
12 |
+
auxiliary_head=None,
|
13 |
+
fmm=dict(type="FMM", in_channels=[768, 768, 768, 768]),
|
14 |
+
)
|
configs/ktda/experiment_m.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = [
|
2 |
+
"../_base_/models/ktda.py",
|
3 |
+
"../_base_/datasets/grass.py",
|
4 |
+
"../_base_/default_runtime.py",
|
5 |
+
"../_base_/schedules/grass_schedule.py",
|
6 |
+
]
|
7 |
+
|
8 |
+
data_preprocessor = dict(size=(256, 256))
|
9 |
+
model = dict(
|
10 |
+
data_preprocessor=data_preprocessor,
|
11 |
+
decode_head=dict(num_classes=5),
|
12 |
+
auxiliary_head=dict(num_classes=5),
|
13 |
+
fmm=dict(
|
14 |
+
type="FMM", in_channels=[768, 768, 768, 768], model_type="vitBlock"
|
15 |
+
),
|
16 |
+
)
|