model_configs/CVRP_DeepLabV3plus.py ADDED
@@ -0,0 +1,303 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ crop_size = (
2
+ 512,
3
+ 512,
4
+ )
5
+ data_preprocessor = dict(
6
+ bgr_to_rgb=True,
7
+ mean=[
8
+ 123.675,
9
+ 116.28,
10
+ 103.53,
11
+ ],
12
+ pad_val=0,
13
+ seg_pad_val=255,
14
+ size=(
15
+ 512,
16
+ 512,
17
+ ),
18
+ std=[
19
+ 58.395,
20
+ 57.12,
21
+ 57.375,
22
+ ],
23
+ type='SegDataPreProcessor')
24
+ data_root = 'PanicleDataset/'
25
+ dataset_type = 'TzyDataset'
26
+ default_hooks = dict(
27
+ checkpoint=dict(
28
+ by_epoch=False,
29
+ interval=2500,
30
+ max_keep_ckpts=1,
31
+ save_best='mIoU',
32
+ type='CheckpointHook'),
33
+ logger=dict(interval=100, log_metric_by_epoch=False, type='LoggerHook'),
34
+ param_scheduler=dict(type='ParamSchedulerHook'),
35
+ sampler_seed=dict(type='DistSamplerSeedHook'),
36
+ timer=dict(type='IterTimerHook'),
37
+ visualization=dict(type='SegVisualizationHook'))
38
+ default_scope = 'mmseg'
39
+ env_cfg = dict(
40
+ cudnn_benchmark=True,
41
+ dist_cfg=dict(backend='nccl'),
42
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
43
+ img_ratios = [
44
+ 0.5,
45
+ 0.75,
46
+ 1.0,
47
+ 1.25,
48
+ 1.5,
49
+ 1.75,
50
+ ]
51
+ load_from = None
52
+ log_level = 'INFO'
53
+ log_processor = dict(by_epoch=False)
54
+ model = dict(
55
+ auxiliary_head=dict(
56
+ align_corners=False,
57
+ channels=256,
58
+ concat_input=False,
59
+ dropout_ratio=0.1,
60
+ in_channels=1024,
61
+ in_index=2,
62
+ loss_decode=dict(
63
+ loss_weight=0.4, type='CrossEntropyLoss', use_sigmoid=False),
64
+ norm_cfg=dict(requires_grad=True, type='BN'),
65
+ num_classes=2,
66
+ num_convs=1,
67
+ type='FCNHead'),
68
+ backbone=dict(
69
+ contract_dilation=True,
70
+ depth=101,
71
+ dilations=(
72
+ 1,
73
+ 1,
74
+ 2,
75
+ 4,
76
+ ),
77
+ norm_cfg=dict(requires_grad=True, type='BN'),
78
+ norm_eval=False,
79
+ num_stages=4,
80
+ out_indices=(
81
+ 0,
82
+ 1,
83
+ 2,
84
+ 3,
85
+ ),
86
+ strides=(
87
+ 1,
88
+ 2,
89
+ 1,
90
+ 1,
91
+ ),
92
+ style='pytorch',
93
+ type='ResNetV1c'),
94
+ data_preprocessor=dict(
95
+ bgr_to_rgb=True,
96
+ mean=[
97
+ 123.675,
98
+ 116.28,
99
+ 103.53,
100
+ ],
101
+ pad_val=0,
102
+ seg_pad_val=255,
103
+ size=(
104
+ 512,
105
+ 512,
106
+ ),
107
+ std=[
108
+ 58.395,
109
+ 57.12,
110
+ 57.375,
111
+ ],
112
+ type='SegDataPreProcessor'),
113
+ decode_head=dict(
114
+ align_corners=False,
115
+ c1_channels=48,
116
+ c1_in_channels=256,
117
+ channels=512,
118
+ dilations=(
119
+ 1,
120
+ 12,
121
+ 24,
122
+ 36,
123
+ ),
124
+ dropout_ratio=0.1,
125
+ in_channels=2048,
126
+ in_index=3,
127
+ loss_decode=dict(
128
+ loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=False),
129
+ norm_cfg=dict(requires_grad=True, type='BN'),
130
+ num_classes=2,
131
+ type='DepthwiseSeparableASPPHead'),
132
+ pretrained='open-mmlab://resnet101_v1c',
133
+ test_cfg=dict(mode='whole'),
134
+ train_cfg=dict(),
135
+ type='EncoderDecoder')
136
+ norm_cfg = dict(requires_grad=True, type='BN')
137
+ optim_wrapper = dict(
138
+ clip_grad=None,
139
+ optimizer=dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0005),
140
+ type='OptimWrapper')
141
+ optimizer = dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0005)
142
+ param_scheduler = [
143
+ dict(
144
+ begin=0,
145
+ by_epoch=False,
146
+ end=160000,
147
+ eta_min=0.0001,
148
+ power=0.9,
149
+ type='PolyLR'),
150
+ ]
151
+ randomness = dict(seed=0)
152
+ resume = False
153
+ test_cfg = dict(type='TestLoop')
154
+ test_dataloader = dict(
155
+ batch_size=1,
156
+ dataset=dict(
157
+ data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
158
+ data_root='PanicleDataset/',
159
+ pipeline=[
160
+ dict(type='LoadImageFromFile'),
161
+ dict(keep_ratio=True, scale=(
162
+ 2048,
163
+ 1024,
164
+ ), type='Resize'),
165
+ dict(type='LoadAnnotations'),
166
+ dict(type='PackSegInputs'),
167
+ ],
168
+ type='TzyDataset'),
169
+ num_workers=4,
170
+ persistent_workers=True,
171
+ sampler=dict(shuffle=False, type='DefaultSampler'))
172
+ test_evaluator = dict(
173
+ iou_metrics=[
174
+ 'mIoU',
175
+ 'mDice',
176
+ 'mFscore',
177
+ ], type='IoUMetric')
178
+ test_pipeline = [
179
+ dict(type='LoadImageFromFile'),
180
+ dict(keep_ratio=True, scale=(
181
+ 2048,
182
+ 1024,
183
+ ), type='Resize'),
184
+ dict(type='LoadAnnotations'),
185
+ dict(type='PackSegInputs'),
186
+ ]
187
+ train_cfg = dict(max_iters=20000, type='IterBasedTrainLoop', val_interval=500)
188
+ train_dataloader = dict(
189
+ batch_size=4,
190
+ dataset=dict(
191
+ data_prefix=dict(
192
+ img_path='img_dir/train', seg_map_path='ann_dir/train'),
193
+ data_root='PanicleDataset/',
194
+ pipeline=[
195
+ dict(type='LoadImageFromFile'),
196
+ dict(type='LoadAnnotations'),
197
+ dict(
198
+ keep_ratio=True,
199
+ ratio_range=(
200
+ 0.5,
201
+ 2.0,
202
+ ),
203
+ scale=(
204
+ 2048,
205
+ 1024,
206
+ ),
207
+ type='RandomResize'),
208
+ dict(
209
+ cat_max_ratio=0.75, crop_size=(
210
+ 512,
211
+ 512,
212
+ ), type='RandomCrop'),
213
+ dict(prob=0.5, type='RandomFlip'),
214
+ dict(type='PhotoMetricDistortion'),
215
+ dict(type='PackSegInputs'),
216
+ ],
217
+ type='TzyDataset'),
218
+ num_workers=2,
219
+ persistent_workers=True,
220
+ sampler=dict(shuffle=True, type='InfiniteSampler'))
221
+ train_pipeline = [
222
+ dict(type='LoadImageFromFile'),
223
+ dict(type='LoadAnnotations'),
224
+ dict(
225
+ keep_ratio=True,
226
+ ratio_range=(
227
+ 0.5,
228
+ 2.0,
229
+ ),
230
+ scale=(
231
+ 2048,
232
+ 1024,
233
+ ),
234
+ type='RandomResize'),
235
+ dict(cat_max_ratio=0.75, crop_size=(
236
+ 512,
237
+ 512,
238
+ ), type='RandomCrop'),
239
+ dict(prob=0.5, type='RandomFlip'),
240
+ dict(type='PhotoMetricDistortion'),
241
+ dict(type='PackSegInputs'),
242
+ ]
243
+ tta_model = dict(type='SegTTAModel')
244
+ tta_pipeline = [
245
+ dict(file_client_args=dict(backend='disk'), type='LoadImageFromFile'),
246
+ dict(
247
+ transforms=[
248
+ [
249
+ dict(keep_ratio=True, scale_factor=0.5, type='Resize'),
250
+ dict(keep_ratio=True, scale_factor=0.75, type='Resize'),
251
+ dict(keep_ratio=True, scale_factor=1.0, type='Resize'),
252
+ dict(keep_ratio=True, scale_factor=1.25, type='Resize'),
253
+ dict(keep_ratio=True, scale_factor=1.5, type='Resize'),
254
+ dict(keep_ratio=True, scale_factor=1.75, type='Resize'),
255
+ ],
256
+ [
257
+ dict(direction='horizontal', prob=0.0, type='RandomFlip'),
258
+ dict(direction='horizontal', prob=1.0, type='RandomFlip'),
259
+ ],
260
+ [
261
+ dict(type='LoadAnnotations'),
262
+ ],
263
+ [
264
+ dict(type='PackSegInputs'),
265
+ ],
266
+ ],
267
+ type='TestTimeAug'),
268
+ ]
269
+ val_cfg = dict(type='ValLoop')
270
+ val_dataloader = dict(
271
+ batch_size=1,
272
+ dataset=dict(
273
+ data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
274
+ data_root='PanicleDataset/',
275
+ pipeline=[
276
+ dict(type='LoadImageFromFile'),
277
+ dict(keep_ratio=True, scale=(
278
+ 2048,
279
+ 1024,
280
+ ), type='Resize'),
281
+ dict(type='LoadAnnotations'),
282
+ dict(type='PackSegInputs'),
283
+ ],
284
+ type='TzyDataset'),
285
+ num_workers=4,
286
+ persistent_workers=True,
287
+ sampler=dict(shuffle=False, type='DefaultSampler'))
288
+ val_evaluator = dict(
289
+ iou_metrics=[
290
+ 'mIoU',
291
+ 'mDice',
292
+ 'mFscore',
293
+ ], type='IoUMetric')
294
+ vis_backends = [
295
+ dict(type='LocalVisBackend'),
296
+ ]
297
+ visualizer = dict(
298
+ name='visualizer',
299
+ type='SegLocalVisualizer',
300
+ vis_backends=[
301
+ dict(type='LocalVisBackend'),
302
+ ])
303
+ work_dir = './work_dirs/TzyDataset-DeepLabV3plus-0725'
model_configs/CVRP_KNet.py ADDED
@@ -0,0 +1,404 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpoint_file = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_large_patch4_window7_224_22k_20220308-d5bdebaf.pth'
2
+ conv_kernel_size = 1
3
+ crop_size = (
4
+ 512,
5
+ 512,
6
+ )
7
+ data_preprocessor = dict(
8
+ bgr_to_rgb=True,
9
+ mean=[
10
+ 123.675,
11
+ 116.28,
12
+ 103.53,
13
+ ],
14
+ pad_val=0,
15
+ seg_pad_val=255,
16
+ size=(
17
+ 512,
18
+ 512,
19
+ ),
20
+ std=[
21
+ 58.395,
22
+ 57.12,
23
+ 57.375,
24
+ ],
25
+ type='SegDataPreProcessor')
26
+ data_root = 'PanicleDataset/'
27
+ dataset_type = 'TzyDataset'
28
+ default_hooks = dict(
29
+ checkpoint=dict(
30
+ by_epoch=False,
31
+ interval=2500,
32
+ max_keep_ckpts=1,
33
+ save_best='mIoU',
34
+ type='CheckpointHook'),
35
+ logger=dict(interval=100, log_metric_by_epoch=False, type='LoggerHook'),
36
+ param_scheduler=dict(type='ParamSchedulerHook'),
37
+ sampler_seed=dict(type='DistSamplerSeedHook'),
38
+ timer=dict(type='IterTimerHook'),
39
+ visualization=dict(type='SegVisualizationHook'))
40
+ default_scope = 'mmseg'
41
+ env_cfg = dict(
42
+ cudnn_benchmark=True,
43
+ dist_cfg=dict(backend='nccl'),
44
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
45
+ img_ratios = [
46
+ 0.5,
47
+ 0.75,
48
+ 1.0,
49
+ 1.25,
50
+ 1.5,
51
+ 1.75,
52
+ ]
53
+ load_from = None
54
+ log_level = 'INFO'
55
+ log_processor = dict(by_epoch=False)
56
+ model = dict(
57
+ auxiliary_head=dict(
58
+ align_corners=False,
59
+ channels=256,
60
+ concat_input=False,
61
+ dropout_ratio=0.1,
62
+ in_channels=768,
63
+ in_index=2,
64
+ loss_decode=dict(
65
+ loss_weight=0.4, type='CrossEntropyLoss', use_sigmoid=False),
66
+ norm_cfg=dict(requires_grad=True, type='SyncBN'),
67
+ num_classes=2,
68
+ num_convs=1,
69
+ type='FCNHead'),
70
+ backbone=dict(
71
+ attn_drop_rate=0.0,
72
+ depths=[
73
+ 2,
74
+ 2,
75
+ 18,
76
+ 2,
77
+ ],
78
+ drop_path_rate=0.3,
79
+ drop_rate=0.0,
80
+ embed_dims=192,
81
+ mlp_ratio=4,
82
+ num_heads=[
83
+ 6,
84
+ 12,
85
+ 24,
86
+ 48,
87
+ ],
88
+ out_indices=(
89
+ 0,
90
+ 1,
91
+ 2,
92
+ 3,
93
+ ),
94
+ patch_norm=True,
95
+ qk_scale=None,
96
+ qkv_bias=True,
97
+ type='SwinTransformer',
98
+ use_abs_pos_embed=False,
99
+ window_size=7),
100
+ data_preprocessor=dict(
101
+ bgr_to_rgb=True,
102
+ mean=[
103
+ 123.675,
104
+ 116.28,
105
+ 103.53,
106
+ ],
107
+ pad_val=0,
108
+ seg_pad_val=255,
109
+ size=(
110
+ 512,
111
+ 512,
112
+ ),
113
+ std=[
114
+ 58.395,
115
+ 57.12,
116
+ 57.375,
117
+ ],
118
+ type='SegDataPreProcessor'),
119
+ decode_head=dict(
120
+ kernel_generate_head=dict(
121
+ align_corners=False,
122
+ channels=512,
123
+ dropout_ratio=0.1,
124
+ in_channels=[
125
+ 192,
126
+ 384,
127
+ 768,
128
+ 1536,
129
+ ],
130
+ in_index=[
131
+ 0,
132
+ 1,
133
+ 2,
134
+ 3,
135
+ ],
136
+ loss_decode=dict(
137
+ loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=False),
138
+ norm_cfg=dict(requires_grad=True, type='SyncBN'),
139
+ num_classes=2,
140
+ pool_scales=(
141
+ 1,
142
+ 2,
143
+ 3,
144
+ 6,
145
+ ),
146
+ type='UPerHead'),
147
+ kernel_update_head=[
148
+ dict(
149
+ conv_kernel_size=1,
150
+ dropout=0.0,
151
+ feat_transform_cfg=dict(
152
+ act_cfg=None, conv_cfg=dict(type='Conv2d')),
153
+ feedforward_channels=2048,
154
+ ffn_act_cfg=dict(inplace=True, type='ReLU'),
155
+ in_channels=512,
156
+ kernel_updator_cfg=dict(
157
+ act_cfg=dict(inplace=True, type='ReLU'),
158
+ feat_channels=256,
159
+ in_channels=256,
160
+ norm_cfg=dict(type='LN'),
161
+ out_channels=256,
162
+ type='KernelUpdator'),
163
+ num_classes=150,
164
+ num_ffn_fcs=2,
165
+ num_heads=8,
166
+ num_mask_fcs=1,
167
+ out_channels=512,
168
+ type='KernelUpdateHead',
169
+ with_ffn=True),
170
+ dict(
171
+ conv_kernel_size=1,
172
+ dropout=0.0,
173
+ feat_transform_cfg=dict(
174
+ act_cfg=None, conv_cfg=dict(type='Conv2d')),
175
+ feedforward_channels=2048,
176
+ ffn_act_cfg=dict(inplace=True, type='ReLU'),
177
+ in_channels=512,
178
+ kernel_updator_cfg=dict(
179
+ act_cfg=dict(inplace=True, type='ReLU'),
180
+ feat_channels=256,
181
+ in_channels=256,
182
+ norm_cfg=dict(type='LN'),
183
+ out_channels=256,
184
+ type='KernelUpdator'),
185
+ num_classes=150,
186
+ num_ffn_fcs=2,
187
+ num_heads=8,
188
+ num_mask_fcs=1,
189
+ out_channels=512,
190
+ type='KernelUpdateHead',
191
+ with_ffn=True),
192
+ dict(
193
+ conv_kernel_size=1,
194
+ dropout=0.0,
195
+ feat_transform_cfg=dict(
196
+ act_cfg=None, conv_cfg=dict(type='Conv2d')),
197
+ feedforward_channels=2048,
198
+ ffn_act_cfg=dict(inplace=True, type='ReLU'),
199
+ in_channels=512,
200
+ kernel_updator_cfg=dict(
201
+ act_cfg=dict(inplace=True, type='ReLU'),
202
+ feat_channels=256,
203
+ in_channels=256,
204
+ norm_cfg=dict(type='LN'),
205
+ out_channels=256,
206
+ type='KernelUpdator'),
207
+ num_classes=150,
208
+ num_ffn_fcs=2,
209
+ num_heads=8,
210
+ num_mask_fcs=1,
211
+ out_channels=512,
212
+ type='KernelUpdateHead',
213
+ with_ffn=True),
214
+ ],
215
+ num_stages=3,
216
+ type='IterativeDecodeHead'),
217
+ pretrained=
218
+ 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_large_patch4_window7_224_22k_20220308-d5bdebaf.pth',
219
+ test_cfg=dict(mode='whole'),
220
+ train_cfg=dict(),
221
+ type='EncoderDecoder')
222
+ norm_cfg = dict(requires_grad=True, type='BN')
223
+ num_stages = 3
224
+ optim_wrapper = dict(
225
+ clip_grad=dict(max_norm=1, norm_type=2),
226
+ optimizer=dict(
227
+ betas=(
228
+ 0.9,
229
+ 0.999,
230
+ ), lr=6e-05, type='AdamW', weight_decay=0.0005),
231
+ paramwise_cfg=dict(
232
+ custom_keys=dict(
233
+ absolute_pos_embed=dict(decay_mult=0.0),
234
+ norm=dict(decay_mult=0.0),
235
+ relative_position_bias_table=dict(decay_mult=0.0))),
236
+ type='OptimWrapper')
237
+ optimizer = dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0005)
238
+ param_scheduler = [
239
+ dict(
240
+ begin=0, by_epoch=False, end=1000, start_factor=0.001,
241
+ type='LinearLR'),
242
+ dict(
243
+ begin=1000,
244
+ by_epoch=False,
245
+ end=80000,
246
+ milestones=[
247
+ 60000,
248
+ 72000,
249
+ ],
250
+ type='MultiStepLR'),
251
+ ]
252
+ randomness = dict(seed=0)
253
+ resume = False
254
+ test_cfg = dict(type='TestLoop')
255
+ test_dataloader = dict(
256
+ batch_size=1,
257
+ dataset=dict(
258
+ data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
259
+ data_root='PanicleDataset/',
260
+ pipeline=[
261
+ dict(type='LoadImageFromFile'),
262
+ dict(keep_ratio=True, scale=(
263
+ 2048,
264
+ 1024,
265
+ ), type='Resize'),
266
+ dict(type='LoadAnnotations'),
267
+ dict(type='PackSegInputs'),
268
+ ],
269
+ type='TzyDataset'),
270
+ num_workers=4,
271
+ persistent_workers=True,
272
+ sampler=dict(shuffle=False, type='DefaultSampler'))
273
+ test_evaluator = dict(
274
+ iou_metrics=[
275
+ 'mIoU',
276
+ 'mDice',
277
+ 'mFscore',
278
+ ], type='IoUMetric')
279
+ test_pipeline = [
280
+ dict(type='LoadImageFromFile'),
281
+ dict(keep_ratio=True, scale=(
282
+ 2048,
283
+ 1024,
284
+ ), type='Resize'),
285
+ dict(type='LoadAnnotations'),
286
+ dict(type='PackSegInputs'),
287
+ ]
288
+ train_cfg = dict(max_iters=20000, type='IterBasedTrainLoop', val_interval=500)
289
+ train_dataloader = dict(
290
+ batch_size=2,
291
+ dataset=dict(
292
+ data_prefix=dict(
293
+ img_path='img_dir/train', seg_map_path='ann_dir/train'),
294
+ data_root='PanicleDataset/',
295
+ pipeline=[
296
+ dict(type='LoadImageFromFile'),
297
+ dict(type='LoadAnnotations'),
298
+ dict(
299
+ keep_ratio=True,
300
+ ratio_range=(
301
+ 0.5,
302
+ 2.0,
303
+ ),
304
+ scale=(
305
+ 2048,
306
+ 1024,
307
+ ),
308
+ type='RandomResize'),
309
+ dict(
310
+ cat_max_ratio=0.75, crop_size=(
311
+ 512,
312
+ 512,
313
+ ), type='RandomCrop'),
314
+ dict(prob=0.5, type='RandomFlip'),
315
+ dict(type='PhotoMetricDistortion'),
316
+ dict(type='PackSegInputs'),
317
+ ],
318
+ type='TzyDataset'),
319
+ num_workers=2,
320
+ persistent_workers=True,
321
+ sampler=dict(shuffle=True, type='InfiniteSampler'))
322
+ train_pipeline = [
323
+ dict(type='LoadImageFromFile'),
324
+ dict(type='LoadAnnotations'),
325
+ dict(
326
+ keep_ratio=True,
327
+ ratio_range=(
328
+ 0.5,
329
+ 2.0,
330
+ ),
331
+ scale=(
332
+ 2048,
333
+ 1024,
334
+ ),
335
+ type='RandomResize'),
336
+ dict(cat_max_ratio=0.75, crop_size=(
337
+ 512,
338
+ 512,
339
+ ), type='RandomCrop'),
340
+ dict(prob=0.5, type='RandomFlip'),
341
+ dict(type='PhotoMetricDistortion'),
342
+ dict(type='PackSegInputs'),
343
+ ]
344
+ tta_model = dict(type='SegTTAModel')
345
+ tta_pipeline = [
346
+ dict(file_client_args=dict(backend='disk'), type='LoadImageFromFile'),
347
+ dict(
348
+ transforms=[
349
+ [
350
+ dict(keep_ratio=True, scale_factor=0.5, type='Resize'),
351
+ dict(keep_ratio=True, scale_factor=0.75, type='Resize'),
352
+ dict(keep_ratio=True, scale_factor=1.0, type='Resize'),
353
+ dict(keep_ratio=True, scale_factor=1.25, type='Resize'),
354
+ dict(keep_ratio=True, scale_factor=1.5, type='Resize'),
355
+ dict(keep_ratio=True, scale_factor=1.75, type='Resize'),
356
+ ],
357
+ [
358
+ dict(direction='horizontal', prob=0.0, type='RandomFlip'),
359
+ dict(direction='horizontal', prob=1.0, type='RandomFlip'),
360
+ ],
361
+ [
362
+ dict(type='LoadAnnotations'),
363
+ ],
364
+ [
365
+ dict(type='PackSegInputs'),
366
+ ],
367
+ ],
368
+ type='TestTimeAug'),
369
+ ]
370
+ val_cfg = dict(type='ValLoop')
371
+ val_dataloader = dict(
372
+ batch_size=1,
373
+ dataset=dict(
374
+ data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
375
+ data_root='PanicleDataset/',
376
+ pipeline=[
377
+ dict(type='LoadImageFromFile'),
378
+ dict(keep_ratio=True, scale=(
379
+ 2048,
380
+ 1024,
381
+ ), type='Resize'),
382
+ dict(type='LoadAnnotations'),
383
+ dict(type='PackSegInputs'),
384
+ ],
385
+ type='TzyDataset'),
386
+ num_workers=4,
387
+ persistent_workers=True,
388
+ sampler=dict(shuffle=False, type='DefaultSampler'))
389
+ val_evaluator = dict(
390
+ iou_metrics=[
391
+ 'mIoU',
392
+ 'mDice',
393
+ 'mFscore',
394
+ ], type='IoUMetric')
395
+ vis_backends = [
396
+ dict(type='LocalVisBackend'),
397
+ ]
398
+ visualizer = dict(
399
+ name='visualizer',
400
+ type='SegLocalVisualizer',
401
+ vis_backends=[
402
+ dict(type='LocalVisBackend'),
403
+ ])
404
+ work_dir = './work_dirs/TzyDataset-KNet-0721'
model_configs/CVRP_Mask2Former.py ADDED
@@ -0,0 +1,572 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ auto_scale_lr = dict(base_batch_size=16, enable=False)
2
+ backbone_embed_multi = dict(decay_mult=0.0, lr_mult=0.1)
3
+ backbone_norm_multi = dict(decay_mult=0.0, lr_mult=0.1)
4
+ crop_size = (
5
+ 512,
6
+ 512,
7
+ )
8
+ custom_keys = dict({
9
+ 'absolute_pos_embed':
10
+ dict(decay_mult=0.0, lr_mult=0.1),
11
+ 'backbone':
12
+ dict(decay_mult=1.0, lr_mult=0.1),
13
+ 'backbone.norm':
14
+ dict(decay_mult=0.0, lr_mult=0.1),
15
+ 'backbone.patch_embed.norm':
16
+ dict(decay_mult=0.0, lr_mult=0.1),
17
+ 'backbone.stages.0.blocks.0.norm':
18
+ dict(decay_mult=0.0, lr_mult=0.1),
19
+ 'backbone.stages.0.blocks.1.norm':
20
+ dict(decay_mult=0.0, lr_mult=0.1),
21
+ 'backbone.stages.0.downsample.norm':
22
+ dict(decay_mult=0.0, lr_mult=0.1),
23
+ 'backbone.stages.1.blocks.0.norm':
24
+ dict(decay_mult=0.0, lr_mult=0.1),
25
+ 'backbone.stages.1.blocks.1.norm':
26
+ dict(decay_mult=0.0, lr_mult=0.1),
27
+ 'backbone.stages.1.downsample.norm':
28
+ dict(decay_mult=0.0, lr_mult=0.1),
29
+ 'backbone.stages.2.blocks.0.norm':
30
+ dict(decay_mult=0.0, lr_mult=0.1),
31
+ 'backbone.stages.2.blocks.1.norm':
32
+ dict(decay_mult=0.0, lr_mult=0.1),
33
+ 'backbone.stages.2.blocks.10.norm':
34
+ dict(decay_mult=0.0, lr_mult=0.1),
35
+ 'backbone.stages.2.blocks.11.norm':
36
+ dict(decay_mult=0.0, lr_mult=0.1),
37
+ 'backbone.stages.2.blocks.12.norm':
38
+ dict(decay_mult=0.0, lr_mult=0.1),
39
+ 'backbone.stages.2.blocks.13.norm':
40
+ dict(decay_mult=0.0, lr_mult=0.1),
41
+ 'backbone.stages.2.blocks.14.norm':
42
+ dict(decay_mult=0.0, lr_mult=0.1),
43
+ 'backbone.stages.2.blocks.15.norm':
44
+ dict(decay_mult=0.0, lr_mult=0.1),
45
+ 'backbone.stages.2.blocks.16.norm':
46
+ dict(decay_mult=0.0, lr_mult=0.1),
47
+ 'backbone.stages.2.blocks.17.norm':
48
+ dict(decay_mult=0.0, lr_mult=0.1),
49
+ 'backbone.stages.2.blocks.2.norm':
50
+ dict(decay_mult=0.0, lr_mult=0.1),
51
+ 'backbone.stages.2.blocks.3.norm':
52
+ dict(decay_mult=0.0, lr_mult=0.1),
53
+ 'backbone.stages.2.blocks.4.norm':
54
+ dict(decay_mult=0.0, lr_mult=0.1),
55
+ 'backbone.stages.2.blocks.5.norm':
56
+ dict(decay_mult=0.0, lr_mult=0.1),
57
+ 'backbone.stages.2.blocks.6.norm':
58
+ dict(decay_mult=0.0, lr_mult=0.1),
59
+ 'backbone.stages.2.blocks.7.norm':
60
+ dict(decay_mult=0.0, lr_mult=0.1),
61
+ 'backbone.stages.2.blocks.8.norm':
62
+ dict(decay_mult=0.0, lr_mult=0.1),
63
+ 'backbone.stages.2.blocks.9.norm':
64
+ dict(decay_mult=0.0, lr_mult=0.1),
65
+ 'backbone.stages.2.downsample.norm':
66
+ dict(decay_mult=0.0, lr_mult=0.1),
67
+ 'backbone.stages.3.blocks.0.norm':
68
+ dict(decay_mult=0.0, lr_mult=0.1),
69
+ 'backbone.stages.3.blocks.1.norm':
70
+ dict(decay_mult=0.0, lr_mult=0.1),
71
+ 'level_embed':
72
+ dict(decay_mult=0.0, lr_mult=1.0),
73
+ 'query_embed':
74
+ dict(decay_mult=0.0, lr_mult=1.0),
75
+ 'query_feat':
76
+ dict(decay_mult=0.0, lr_mult=1.0),
77
+ 'relative_position_bias_table':
78
+ dict(decay_mult=0.0, lr_mult=0.1)
79
+ })
80
+ data_preprocessor = dict(
81
+ bgr_to_rgb=True,
82
+ mean=[
83
+ 123.675,
84
+ 116.28,
85
+ 103.53,
86
+ ],
87
+ pad_val=0,
88
+ seg_pad_val=255,
89
+ size=(
90
+ 640,
91
+ 640,
92
+ ),
93
+ std=[
94
+ 58.395,
95
+ 57.12,
96
+ 57.375,
97
+ ],
98
+ type='SegDataPreProcessor')
99
+ data_root = 'PanicleDataset/'
100
+ dataset_type = 'TzyDataset'
101
+ default_hooks = dict(
102
+ checkpoint=dict(
103
+ by_epoch=False,
104
+ interval=2500,
105
+ max_keep_ckpts=1,
106
+ save_best='mIoU',
107
+ type='CheckpointHook'),
108
+ logger=dict(interval=100, log_metric_by_epoch=False, type='LoggerHook'),
109
+ param_scheduler=dict(type='ParamSchedulerHook'),
110
+ sampler_seed=dict(type='DistSamplerSeedHook'),
111
+ timer=dict(type='IterTimerHook'),
112
+ visualization=dict(type='SegVisualizationHook'))
113
+ default_scope = 'mmseg'
114
+ depths = [
115
+ 2,
116
+ 2,
117
+ 18,
118
+ 2,
119
+ ]
120
+ embed_multi = dict(decay_mult=0.0, lr_mult=1.0)
121
+ env_cfg = dict(
122
+ cudnn_benchmark=True,
123
+ dist_cfg=dict(backend='nccl'),
124
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
125
+ img_ratios = [
126
+ 0.5,
127
+ 0.75,
128
+ 1.0,
129
+ 1.25,
130
+ 1.5,
131
+ 1.75,
132
+ ]
133
+ load_from = None
134
+ log_level = 'INFO'
135
+ log_processor = dict(by_epoch=False)
136
+ model = dict(
137
+ backbone=dict(
138
+ attn_drop_rate=0.0,
139
+ depths=[
140
+ 2,
141
+ 2,
142
+ 18,
143
+ 2,
144
+ ],
145
+ drop_path_rate=0.3,
146
+ drop_rate=0.0,
147
+ embed_dims=192,
148
+ frozen_stages=-1,
149
+ init_cfg=dict(
150
+ checkpoint=
151
+ 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_large_patch4_window12_384_22k_20220412-6580f57d.pth',
152
+ type='Pretrained'),
153
+ mlp_ratio=4,
154
+ num_heads=[
155
+ 6,
156
+ 12,
157
+ 24,
158
+ 48,
159
+ ],
160
+ out_indices=(
161
+ 0,
162
+ 1,
163
+ 2,
164
+ 3,
165
+ ),
166
+ patch_norm=True,
167
+ pretrain_img_size=384,
168
+ qk_scale=None,
169
+ qkv_bias=True,
170
+ type='SwinTransformer',
171
+ window_size=12,
172
+ with_cp=False),
173
+ data_preprocessor=dict(
174
+ bgr_to_rgb=True,
175
+ mean=[
176
+ 123.675,
177
+ 116.28,
178
+ 103.53,
179
+ ],
180
+ pad_val=0,
181
+ seg_pad_val=255,
182
+ size=(
183
+ 512,
184
+ 512,
185
+ ),
186
+ std=[
187
+ 58.395,
188
+ 57.12,
189
+ 57.375,
190
+ ],
191
+ type='SegDataPreProcessor'),
192
+ decode_head=dict(
193
+ align_corners=False,
194
+ enforce_decoder_input_project=False,
195
+ feat_channels=256,
196
+ in_channels=[
197
+ 192,
198
+ 384,
199
+ 768,
200
+ 1536,
201
+ ],
202
+ loss_cls=dict(
203
+ class_weight=[
204
+ 1.0,
205
+ 1.0,
206
+ 0.1,
207
+ ],
208
+ loss_weight=2.0,
209
+ reduction='mean',
210
+ type='mmdet.CrossEntropyLoss',
211
+ use_sigmoid=False),
212
+ loss_dice=dict(
213
+ activate=True,
214
+ eps=1.0,
215
+ loss_weight=5.0,
216
+ naive_dice=True,
217
+ reduction='mean',
218
+ type='mmdet.DiceLoss',
219
+ use_sigmoid=True),
220
+ loss_mask=dict(
221
+ loss_weight=5.0,
222
+ reduction='mean',
223
+ type='mmdet.CrossEntropyLoss',
224
+ use_sigmoid=True),
225
+ num_classes=2,
226
+ num_queries=100,
227
+ num_transformer_feat_level=3,
228
+ out_channels=256,
229
+ pixel_decoder=dict(
230
+ act_cfg=dict(type='ReLU'),
231
+ encoder=dict(
232
+ init_cfg=None,
233
+ layer_cfg=dict(
234
+ ffn_cfg=dict(
235
+ act_cfg=dict(inplace=True, type='ReLU'),
236
+ embed_dims=256,
237
+ feedforward_channels=1024,
238
+ ffn_drop=0.0,
239
+ num_fcs=2),
240
+ self_attn_cfg=dict(
241
+ batch_first=True,
242
+ dropout=0.0,
243
+ embed_dims=256,
244
+ im2col_step=64,
245
+ init_cfg=None,
246
+ norm_cfg=None,
247
+ num_heads=8,
248
+ num_levels=3,
249
+ num_points=4)),
250
+ num_layers=6),
251
+ init_cfg=None,
252
+ norm_cfg=dict(num_groups=32, type='GN'),
253
+ num_outs=3,
254
+ positional_encoding=dict(normalize=True, num_feats=128),
255
+ type='mmdet.MSDeformAttnPixelDecoder'),
256
+ positional_encoding=dict(normalize=True, num_feats=128),
257
+ strides=[
258
+ 4,
259
+ 8,
260
+ 16,
261
+ 32,
262
+ ],
263
+ train_cfg=dict(
264
+ assigner=dict(
265
+ match_costs=[
266
+ dict(type='mmdet.ClassificationCost', weight=2.0),
267
+ dict(
268
+ type='mmdet.CrossEntropyLossCost',
269
+ use_sigmoid=True,
270
+ weight=5.0),
271
+ dict(
272
+ eps=1.0,
273
+ pred_act=True,
274
+ type='mmdet.DiceCost',
275
+ weight=5.0),
276
+ ],
277
+ type='mmdet.HungarianAssigner'),
278
+ importance_sample_ratio=0.75,
279
+ num_points=12544,
280
+ oversample_ratio=3.0,
281
+ sampler=dict(type='mmdet.MaskPseudoSampler')),
282
+ transformer_decoder=dict(
283
+ init_cfg=None,
284
+ layer_cfg=dict(
285
+ cross_attn_cfg=dict(
286
+ attn_drop=0.0,
287
+ batch_first=True,
288
+ dropout_layer=None,
289
+ embed_dims=256,
290
+ num_heads=8,
291
+ proj_drop=0.0),
292
+ ffn_cfg=dict(
293
+ act_cfg=dict(inplace=True, type='ReLU'),
294
+ add_identity=True,
295
+ dropout_layer=None,
296
+ embed_dims=256,
297
+ feedforward_channels=2048,
298
+ ffn_drop=0.0,
299
+ num_fcs=2),
300
+ self_attn_cfg=dict(
301
+ attn_drop=0.0,
302
+ batch_first=True,
303
+ dropout_layer=None,
304
+ embed_dims=256,
305
+ num_heads=8,
306
+ proj_drop=0.0)),
307
+ num_layers=9,
308
+ return_intermediate=True),
309
+ type='Mask2FormerHead'),
310
+ test_cfg=dict(mode='whole'),
311
+ train_cfg=dict(),
312
+ type='EncoderDecoder')
313
+ norm_cfg = dict(requires_grad=True, type='BN')
314
+ num_classes = 150
315
+ optim_wrapper = dict(
316
+ clip_grad=dict(max_norm=0.01, norm_type=2),
317
+ optimizer=dict(
318
+ betas=(
319
+ 0.9,
320
+ 0.999,
321
+ ),
322
+ eps=1e-08,
323
+ lr=0.0001,
324
+ type='AdamW',
325
+ weight_decay=0.05),
326
+ paramwise_cfg=dict(
327
+ custom_keys=dict({
328
+ 'absolute_pos_embed':
329
+ dict(decay_mult=0.0, lr_mult=0.1),
330
+ 'backbone':
331
+ dict(decay_mult=1.0, lr_mult=0.1),
332
+ 'backbone.norm':
333
+ dict(decay_mult=0.0, lr_mult=0.1),
334
+ 'backbone.patch_embed.norm':
335
+ dict(decay_mult=0.0, lr_mult=0.1),
336
+ 'backbone.stages.0.blocks.0.norm':
337
+ dict(decay_mult=0.0, lr_mult=0.1),
338
+ 'backbone.stages.0.blocks.1.norm':
339
+ dict(decay_mult=0.0, lr_mult=0.1),
340
+ 'backbone.stages.0.downsample.norm':
341
+ dict(decay_mult=0.0, lr_mult=0.1),
342
+ 'backbone.stages.1.blocks.0.norm':
343
+ dict(decay_mult=0.0, lr_mult=0.1),
344
+ 'backbone.stages.1.blocks.1.norm':
345
+ dict(decay_mult=0.0, lr_mult=0.1),
346
+ 'backbone.stages.1.downsample.norm':
347
+ dict(decay_mult=0.0, lr_mult=0.1),
348
+ 'backbone.stages.2.blocks.0.norm':
349
+ dict(decay_mult=0.0, lr_mult=0.1),
350
+ 'backbone.stages.2.blocks.1.norm':
351
+ dict(decay_mult=0.0, lr_mult=0.1),
352
+ 'backbone.stages.2.blocks.10.norm':
353
+ dict(decay_mult=0.0, lr_mult=0.1),
354
+ 'backbone.stages.2.blocks.11.norm':
355
+ dict(decay_mult=0.0, lr_mult=0.1),
356
+ 'backbone.stages.2.blocks.12.norm':
357
+ dict(decay_mult=0.0, lr_mult=0.1),
358
+ 'backbone.stages.2.blocks.13.norm':
359
+ dict(decay_mult=0.0, lr_mult=0.1),
360
+ 'backbone.stages.2.blocks.14.norm':
361
+ dict(decay_mult=0.0, lr_mult=0.1),
362
+ 'backbone.stages.2.blocks.15.norm':
363
+ dict(decay_mult=0.0, lr_mult=0.1),
364
+ 'backbone.stages.2.blocks.16.norm':
365
+ dict(decay_mult=0.0, lr_mult=0.1),
366
+ 'backbone.stages.2.blocks.17.norm':
367
+ dict(decay_mult=0.0, lr_mult=0.1),
368
+ 'backbone.stages.2.blocks.2.norm':
369
+ dict(decay_mult=0.0, lr_mult=0.1),
370
+ 'backbone.stages.2.blocks.3.norm':
371
+ dict(decay_mult=0.0, lr_mult=0.1),
372
+ 'backbone.stages.2.blocks.4.norm':
373
+ dict(decay_mult=0.0, lr_mult=0.1),
374
+ 'backbone.stages.2.blocks.5.norm':
375
+ dict(decay_mult=0.0, lr_mult=0.1),
376
+ 'backbone.stages.2.blocks.6.norm':
377
+ dict(decay_mult=0.0, lr_mult=0.1),
378
+ 'backbone.stages.2.blocks.7.norm':
379
+ dict(decay_mult=0.0, lr_mult=0.1),
380
+ 'backbone.stages.2.blocks.8.norm':
381
+ dict(decay_mult=0.0, lr_mult=0.1),
382
+ 'backbone.stages.2.blocks.9.norm':
383
+ dict(decay_mult=0.0, lr_mult=0.1),
384
+ 'backbone.stages.2.downsample.norm':
385
+ dict(decay_mult=0.0, lr_mult=0.1),
386
+ 'backbone.stages.3.blocks.0.norm':
387
+ dict(decay_mult=0.0, lr_mult=0.1),
388
+ 'backbone.stages.3.blocks.1.norm':
389
+ dict(decay_mult=0.0, lr_mult=0.1),
390
+ 'level_embed':
391
+ dict(decay_mult=0.0, lr_mult=1.0),
392
+ 'query_embed':
393
+ dict(decay_mult=0.0, lr_mult=1.0),
394
+ 'query_feat':
395
+ dict(decay_mult=0.0, lr_mult=1.0),
396
+ 'relative_position_bias_table':
397
+ dict(decay_mult=0.0, lr_mult=0.1)
398
+ }),
399
+ norm_decay_mult=0.0),
400
+ type='OptimWrapper')
401
+ optimizer = dict(
402
+ betas=(
403
+ 0.9,
404
+ 0.999,
405
+ ),
406
+ eps=1e-08,
407
+ lr=0.0001,
408
+ type='AdamW',
409
+ weight_decay=0.05)
410
+ param_scheduler = [
411
+ dict(
412
+ begin=0,
413
+ by_epoch=False,
414
+ end=160000,
415
+ eta_min=0,
416
+ power=0.9,
417
+ type='PolyLR'),
418
+ ]
419
+ pretrained = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/swin/swin_large_patch4_window12_384_22k_20220412-6580f57d.pth'
420
+ randomness = dict(seed=0)
421
+ resume = False
422
+ test_cfg = dict(type='TestLoop')
423
+ test_dataloader = dict(
424
+ batch_size=1,
425
+ dataset=dict(
426
+ data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
427
+ data_root='PanicleDataset/',
428
+ pipeline=[
429
+ dict(type='LoadImageFromFile'),
430
+ dict(keep_ratio=True, scale=(
431
+ 2048,
432
+ 1024,
433
+ ), type='Resize'),
434
+ dict(type='LoadAnnotations'),
435
+ dict(type='PackSegInputs'),
436
+ ],
437
+ type='TzyDataset'),
438
+ num_workers=4,
439
+ persistent_workers=True,
440
+ sampler=dict(shuffle=False, type='DefaultSampler'))
441
+ test_evaluator = dict(
442
+ iou_metrics=[
443
+ 'mIoU',
444
+ 'mDice',
445
+ 'mFscore',
446
+ ], type='IoUMetric')
447
+ test_pipeline = [
448
+ dict(type='LoadImageFromFile'),
449
+ dict(keep_ratio=True, scale=(
450
+ 2048,
451
+ 1024,
452
+ ), type='Resize'),
453
+ dict(type='LoadAnnotations'),
454
+ dict(type='PackSegInputs'),
455
+ ]
456
+ train_cfg = dict(max_iters=20000, type='IterBasedTrainLoop', val_interval=500)
457
+ train_dataloader = dict(
458
+ batch_size=2,
459
+ dataset=dict(
460
+ data_prefix=dict(
461
+ img_path='img_dir/train', seg_map_path='ann_dir/train'),
462
+ data_root='PanicleDataset/',
463
+ pipeline=[
464
+ dict(type='LoadImageFromFile'),
465
+ dict(type='LoadAnnotations'),
466
+ dict(
467
+ keep_ratio=True,
468
+ ratio_range=(
469
+ 0.5,
470
+ 2.0,
471
+ ),
472
+ scale=(
473
+ 2048,
474
+ 1024,
475
+ ),
476
+ type='RandomResize'),
477
+ dict(
478
+ cat_max_ratio=0.75, crop_size=(
479
+ 512,
480
+ 512,
481
+ ), type='RandomCrop'),
482
+ dict(prob=0.5, type='RandomFlip'),
483
+ dict(type='PhotoMetricDistortion'),
484
+ dict(type='PackSegInputs'),
485
+ ],
486
+ type='TzyDataset'),
487
+ num_workers=2,
488
+ persistent_workers=True,
489
+ sampler=dict(shuffle=True, type='InfiniteSampler'))
490
+ train_pipeline = [
491
+ dict(type='LoadImageFromFile'),
492
+ dict(type='LoadAnnotations'),
493
+ dict(
494
+ keep_ratio=True,
495
+ ratio_range=(
496
+ 0.5,
497
+ 2.0,
498
+ ),
499
+ scale=(
500
+ 2048,
501
+ 1024,
502
+ ),
503
+ type='RandomResize'),
504
+ dict(cat_max_ratio=0.75, crop_size=(
505
+ 512,
506
+ 512,
507
+ ), type='RandomCrop'),
508
+ dict(prob=0.5, type='RandomFlip'),
509
+ dict(type='PhotoMetricDistortion'),
510
+ dict(type='PackSegInputs'),
511
+ ]
512
+ tta_model = dict(type='SegTTAModel')
513
+ tta_pipeline = [
514
+ dict(file_client_args=dict(backend='disk'), type='LoadImageFromFile'),
515
+ dict(
516
+ transforms=[
517
+ [
518
+ dict(keep_ratio=True, scale_factor=0.5, type='Resize'),
519
+ dict(keep_ratio=True, scale_factor=0.75, type='Resize'),
520
+ dict(keep_ratio=True, scale_factor=1.0, type='Resize'),
521
+ dict(keep_ratio=True, scale_factor=1.25, type='Resize'),
522
+ dict(keep_ratio=True, scale_factor=1.5, type='Resize'),
523
+ dict(keep_ratio=True, scale_factor=1.75, type='Resize'),
524
+ ],
525
+ [
526
+ dict(direction='horizontal', prob=0.0, type='RandomFlip'),
527
+ dict(direction='horizontal', prob=1.0, type='RandomFlip'),
528
+ ],
529
+ [
530
+ dict(type='LoadAnnotations'),
531
+ ],
532
+ [
533
+ dict(type='PackSegInputs'),
534
+ ],
535
+ ],
536
+ type='TestTimeAug'),
537
+ ]
538
+ val_cfg = dict(type='ValLoop')
539
+ val_dataloader = dict(
540
+ batch_size=1,
541
+ dataset=dict(
542
+ data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
543
+ data_root='PanicleDataset/',
544
+ pipeline=[
545
+ dict(type='LoadImageFromFile'),
546
+ dict(keep_ratio=True, scale=(
547
+ 2048,
548
+ 1024,
549
+ ), type='Resize'),
550
+ dict(type='LoadAnnotations'),
551
+ dict(type='PackSegInputs'),
552
+ ],
553
+ type='TzyDataset'),
554
+ num_workers=4,
555
+ persistent_workers=True,
556
+ sampler=dict(shuffle=False, type='DefaultSampler'))
557
+ val_evaluator = dict(
558
+ iou_metrics=[
559
+ 'mIoU',
560
+ 'mDice',
561
+ 'mFscore',
562
+ ], type='IoUMetric')
563
+ vis_backends = [
564
+ dict(type='LocalVisBackend'),
565
+ ]
566
+ visualizer = dict(
567
+ name='visualizer',
568
+ type='SegLocalVisualizer',
569
+ vis_backends=[
570
+ dict(type='LocalVisBackend'),
571
+ ])
572
+ work_dir = './work_dirs/TzyDataset-Mask2Former-0721'
model_configs/CVRP_Segformer.py ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpoint = 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b5_20220624-658746d9.pth'
2
+ crop_size = (
3
+ 512,
4
+ 512,
5
+ )
6
+ data_preprocessor = dict(
7
+ bgr_to_rgb=True,
8
+ mean=[
9
+ 123.675,
10
+ 116.28,
11
+ 103.53,
12
+ ],
13
+ pad_val=0,
14
+ seg_pad_val=255,
15
+ size=(
16
+ 512,
17
+ 512,
18
+ ),
19
+ std=[
20
+ 58.395,
21
+ 57.12,
22
+ 57.375,
23
+ ],
24
+ type='SegDataPreProcessor')
25
+ data_root = 'PanicleDataset/'
26
+ dataset_type = 'TzyDataset'
27
+ default_hooks = dict(
28
+ checkpoint=dict(
29
+ by_epoch=False,
30
+ interval=2500,
31
+ max_keep_ckpts=1,
32
+ save_best='mIoU',
33
+ type='CheckpointHook'),
34
+ logger=dict(interval=100, log_metric_by_epoch=False, type='LoggerHook'),
35
+ param_scheduler=dict(type='ParamSchedulerHook'),
36
+ sampler_seed=dict(type='DistSamplerSeedHook'),
37
+ timer=dict(type='IterTimerHook'),
38
+ visualization=dict(type='SegVisualizationHook'))
39
+ default_scope = 'mmseg'
40
+ env_cfg = dict(
41
+ cudnn_benchmark=True,
42
+ dist_cfg=dict(backend='nccl'),
43
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
44
+ img_ratios = [
45
+ 0.5,
46
+ 0.75,
47
+ 1.0,
48
+ 1.25,
49
+ 1.5,
50
+ 1.75,
51
+ ]
52
+ load_from = None
53
+ log_level = 'INFO'
54
+ log_processor = dict(by_epoch=False)
55
+ model = dict(
56
+ backbone=dict(
57
+ attn_drop_rate=0.0,
58
+ drop_path_rate=0.1,
59
+ drop_rate=0.0,
60
+ embed_dims=64,
61
+ in_channels=3,
62
+ init_cfg=dict(
63
+ checkpoint=
64
+ 'https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segformer/mit_b5_20220624-658746d9.pth',
65
+ type='Pretrained'),
66
+ mlp_ratio=4,
67
+ num_heads=[
68
+ 1,
69
+ 2,
70
+ 5,
71
+ 8,
72
+ ],
73
+ num_layers=[
74
+ 3,
75
+ 6,
76
+ 40,
77
+ 3,
78
+ ],
79
+ num_stages=4,
80
+ out_indices=(
81
+ 0,
82
+ 1,
83
+ 2,
84
+ 3,
85
+ ),
86
+ patch_sizes=[
87
+ 7,
88
+ 3,
89
+ 3,
90
+ 3,
91
+ ],
92
+ qkv_bias=True,
93
+ sr_ratios=[
94
+ 8,
95
+ 4,
96
+ 2,
97
+ 1,
98
+ ],
99
+ type='MixVisionTransformer'),
100
+ data_preprocessor=dict(
101
+ bgr_to_rgb=True,
102
+ mean=[
103
+ 123.675,
104
+ 116.28,
105
+ 103.53,
106
+ ],
107
+ pad_val=0,
108
+ seg_pad_val=255,
109
+ size=(
110
+ 512,
111
+ 512,
112
+ ),
113
+ std=[
114
+ 58.395,
115
+ 57.12,
116
+ 57.375,
117
+ ],
118
+ type='SegDataPreProcessor'),
119
+ decode_head=dict(
120
+ align_corners=False,
121
+ channels=256,
122
+ dropout_ratio=0.1,
123
+ in_channels=[
124
+ 64,
125
+ 128,
126
+ 320,
127
+ 512,
128
+ ],
129
+ in_index=[
130
+ 0,
131
+ 1,
132
+ 2,
133
+ 3,
134
+ ],
135
+ loss_decode=dict(
136
+ loss_weight=1.0, type='CrossEntropyLoss', use_sigmoid=False),
137
+ norm_cfg=dict(requires_grad=True, type='BN'),
138
+ num_classes=2,
139
+ type='SegformerHead'),
140
+ pretrained=None,
141
+ test_cfg=dict(mode='whole'),
142
+ train_cfg=dict(),
143
+ type='EncoderDecoder')
144
+ norm_cfg = dict(requires_grad=True, type='BN')
145
+ optim_wrapper = dict(
146
+ optimizer=dict(
147
+ betas=(
148
+ 0.9,
149
+ 0.999,
150
+ ), lr=6e-05, type='AdamW', weight_decay=0.01),
151
+ paramwise_cfg=dict(
152
+ custom_keys=dict(
153
+ head=dict(lr_mult=10.0),
154
+ norm=dict(decay_mult=0.0),
155
+ pos_block=dict(decay_mult=0.0))),
156
+ type='OptimWrapper')
157
+ optimizer = dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0005)
158
+ param_scheduler = [
159
+ dict(
160
+ begin=0, by_epoch=False, end=1500, start_factor=1e-06,
161
+ type='LinearLR'),
162
+ dict(
163
+ begin=1500,
164
+ by_epoch=False,
165
+ end=160000,
166
+ eta_min=0.0,
167
+ power=1.0,
168
+ type='PolyLR'),
169
+ ]
170
+ randomness = dict(seed=0)
171
+ resume = False
172
+ test_cfg = dict(type='TestLoop')
173
+ test_dataloader = dict(
174
+ batch_size=1,
175
+ dataset=dict(
176
+ data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
177
+ data_root='PanicleDataset/',
178
+ pipeline=[
179
+ dict(type='LoadImageFromFile'),
180
+ dict(keep_ratio=True, scale=(
181
+ 2048,
182
+ 1024,
183
+ ), type='Resize'),
184
+ dict(type='LoadAnnotations'),
185
+ dict(type='PackSegInputs'),
186
+ ],
187
+ type='TzyDataset'),
188
+ num_workers=4,
189
+ persistent_workers=True,
190
+ sampler=dict(shuffle=False, type='DefaultSampler'))
191
+ test_evaluator = dict(
192
+ iou_metrics=[
193
+ 'mIoU',
194
+ 'mDice',
195
+ 'mFscore',
196
+ ], type='IoUMetric')
197
+ test_pipeline = [
198
+ dict(type='LoadImageFromFile'),
199
+ dict(keep_ratio=True, scale=(
200
+ 2048,
201
+ 1024,
202
+ ), type='Resize'),
203
+ dict(type='LoadAnnotations'),
204
+ dict(type='PackSegInputs'),
205
+ ]
206
+ train_cfg = dict(max_iters=20000, type='IterBasedTrainLoop', val_interval=500)
207
+ train_dataloader = dict(
208
+ batch_size=2,
209
+ dataset=dict(
210
+ data_prefix=dict(
211
+ img_path='img_dir/train', seg_map_path='ann_dir/train'),
212
+ data_root='PanicleDataset/',
213
+ pipeline=[
214
+ dict(type='LoadImageFromFile'),
215
+ dict(type='LoadAnnotations'),
216
+ dict(
217
+ keep_ratio=True,
218
+ ratio_range=(
219
+ 0.5,
220
+ 2.0,
221
+ ),
222
+ scale=(
223
+ 2048,
224
+ 1024,
225
+ ),
226
+ type='RandomResize'),
227
+ dict(
228
+ cat_max_ratio=0.75, crop_size=(
229
+ 512,
230
+ 512,
231
+ ), type='RandomCrop'),
232
+ dict(prob=0.5, type='RandomFlip'),
233
+ dict(type='PhotoMetricDistortion'),
234
+ dict(type='PackSegInputs'),
235
+ ],
236
+ type='TzyDataset'),
237
+ num_workers=2,
238
+ persistent_workers=True,
239
+ sampler=dict(shuffle=True, type='InfiniteSampler'))
240
+ train_pipeline = [
241
+ dict(type='LoadImageFromFile'),
242
+ dict(type='LoadAnnotations'),
243
+ dict(
244
+ keep_ratio=True,
245
+ ratio_range=(
246
+ 0.5,
247
+ 2.0,
248
+ ),
249
+ scale=(
250
+ 2048,
251
+ 1024,
252
+ ),
253
+ type='RandomResize'),
254
+ dict(cat_max_ratio=0.75, crop_size=(
255
+ 512,
256
+ 512,
257
+ ), type='RandomCrop'),
258
+ dict(prob=0.5, type='RandomFlip'),
259
+ dict(type='PhotoMetricDistortion'),
260
+ dict(type='PackSegInputs'),
261
+ ]
262
+ tta_model = dict(type='SegTTAModel')
263
+ tta_pipeline = [
264
+ dict(file_client_args=dict(backend='disk'), type='LoadImageFromFile'),
265
+ dict(
266
+ transforms=[
267
+ [
268
+ dict(keep_ratio=True, scale_factor=0.5, type='Resize'),
269
+ dict(keep_ratio=True, scale_factor=0.75, type='Resize'),
270
+ dict(keep_ratio=True, scale_factor=1.0, type='Resize'),
271
+ dict(keep_ratio=True, scale_factor=1.25, type='Resize'),
272
+ dict(keep_ratio=True, scale_factor=1.5, type='Resize'),
273
+ dict(keep_ratio=True, scale_factor=1.75, type='Resize'),
274
+ ],
275
+ [
276
+ dict(direction='horizontal', prob=0.0, type='RandomFlip'),
277
+ dict(direction='horizontal', prob=1.0, type='RandomFlip'),
278
+ ],
279
+ [
280
+ dict(type='LoadAnnotations'),
281
+ ],
282
+ [
283
+ dict(type='PackSegInputs'),
284
+ ],
285
+ ],
286
+ type='TestTimeAug'),
287
+ ]
288
+ val_cfg = dict(type='ValLoop')
289
+ val_dataloader = dict(
290
+ batch_size=1,
291
+ dataset=dict(
292
+ data_prefix=dict(img_path='img_dir/val', seg_map_path='ann_dir/val'),
293
+ data_root='PanicleDataset/',
294
+ pipeline=[
295
+ dict(type='LoadImageFromFile'),
296
+ dict(keep_ratio=True, scale=(
297
+ 2048,
298
+ 1024,
299
+ ), type='Resize'),
300
+ dict(type='LoadAnnotations'),
301
+ dict(type='PackSegInputs'),
302
+ ],
303
+ type='TzyDataset'),
304
+ num_workers=4,
305
+ persistent_workers=True,
306
+ sampler=dict(shuffle=False, type='DefaultSampler'))
307
+ val_evaluator = dict(
308
+ iou_metrics=[
309
+ 'mIoU',
310
+ 'mDice',
311
+ 'mFscore',
312
+ ], type='IoUMetric')
313
+ vis_backends = [
314
+ dict(type='LocalVisBackend'),
315
+ ]
316
+ visualizer = dict(
317
+ name='visualizer',
318
+ type='SegLocalVisualizer',
319
+ vis_backends=[
320
+ dict(type='LocalVisBackend'),
321
+ ])
322
+ work_dir = './work_dirs/TzyDataset-Segformer-0721'