|
crop_size = ( |
|
512, |
|
512, |
|
) |
|
model = dict( |
|
backbone=dict( |
|
adapter_index=[ |
|
0, |
|
1, |
|
2, |
|
3, |
|
4, |
|
5, |
|
6, |
|
7, |
|
8, |
|
9, |
|
10, |
|
11, |
|
12, |
|
13, |
|
14, |
|
15, |
|
16, |
|
17, |
|
18, |
|
19, |
|
20, |
|
21, |
|
22, |
|
23, |
|
], |
|
block_chunks=0, |
|
depth=24, |
|
embed_dim=1024, |
|
ffn_bias=True, |
|
ffn_layer='mlp', |
|
has_cat=False, |
|
img_size=512, |
|
init_values=1e-05, |
|
mlp_ratio=4, |
|
num_heads=16, |
|
cloud_adapter_config=dict( |
|
cnn_type='pmaa', |
|
context_dim=64, |
|
depth=4, |
|
emd_dim=1024, |
|
global_groups=1, |
|
hidden_channels=64, |
|
int_type='convnext', |
|
local_groups=1, |
|
num_layers=24, |
|
rank_dim=16, |
|
return_last_feature=False, |
|
return_multi_feats=False, |
|
type='CloudAdapter'), |
|
patch_size=16, |
|
proj_bias=True, |
|
qkv_bias=True, |
|
type='CloudAdapterDinoVisionTransformer'), |
|
data_preprocessor=dict( |
|
bgr_to_rgb=True, |
|
mean=[ |
|
123.675, |
|
116.28, |
|
103.53, |
|
], |
|
pad_val=0, |
|
seg_pad_val=255, |
|
size=( |
|
512, |
|
512, |
|
), |
|
std=[ |
|
58.395, |
|
57.12, |
|
57.375, |
|
], |
|
type='SegDataPreProcessor'), |
|
decode_head=dict( |
|
align_corners=False, |
|
enforce_decoder_input_project=False, |
|
feat_channels=256, |
|
in_channels=[ |
|
1024, |
|
1024, |
|
1024, |
|
1024, |
|
], |
|
loss_cls=dict( |
|
class_weight=[ |
|
1.0, |
|
1.0, |
|
1.0, |
|
1.0, |
|
0.1, |
|
], |
|
loss_weight=2.0, |
|
reduction='mean', |
|
type='mmdet.CrossEntropyLoss', |
|
use_sigmoid=False), |
|
loss_dice=dict( |
|
activate=True, |
|
eps=1.0, |
|
loss_weight=5.0, |
|
naive_dice=True, |
|
reduction='mean', |
|
type='mmdet.DiceLoss', |
|
use_sigmoid=True), |
|
loss_mask=dict( |
|
loss_weight=5.0, |
|
reduction='mean', |
|
type='mmdet.CrossEntropyLoss', |
|
use_sigmoid=True), |
|
num_classes=4, |
|
num_queries=100, |
|
num_transformer_feat_level=3, |
|
out_channels=256, |
|
pixel_decoder=dict( |
|
act_cfg=dict(type='ReLU'), |
|
encoder=dict( |
|
init_cfg=None, |
|
layer_cfg=dict( |
|
ffn_cfg=dict( |
|
act_cfg=dict(inplace=True, type='ReLU'), |
|
embed_dims=256, |
|
feedforward_channels=1024, |
|
ffn_drop=0.0, |
|
num_fcs=2), |
|
self_attn_cfg=dict( |
|
batch_first=True, |
|
dropout=0.0, |
|
embed_dims=256, |
|
im2col_step=64, |
|
init_cfg=None, |
|
norm_cfg=None, |
|
num_heads=8, |
|
num_levels=3, |
|
num_points=4)), |
|
num_layers=6), |
|
init_cfg=None, |
|
norm_cfg=dict(num_groups=32, type='GN'), |
|
num_outs=3, |
|
positional_encoding=dict(normalize=True, num_feats=128), |
|
type='mmdet.MSDeformAttnPixelDecoder'), |
|
positional_encoding=dict(normalize=True, num_feats=128), |
|
strides=[ |
|
4, |
|
8, |
|
16, |
|
32, |
|
], |
|
train_cfg=dict( |
|
assigner=dict( |
|
match_costs=[ |
|
dict(type='mmdet.ClassificationCost', weight=2.0), |
|
dict( |
|
type='mmdet.CrossEntropyLossCost', |
|
use_sigmoid=True, |
|
weight=5.0), |
|
dict( |
|
eps=1.0, |
|
pred_act=True, |
|
type='mmdet.DiceCost', |
|
weight=5.0), |
|
], |
|
type='mmdet.HungarianAssigner'), |
|
importance_sample_ratio=0.75, |
|
num_points=12544, |
|
oversample_ratio=3.0, |
|
sampler=dict(type='mmdet.MaskPseudoSampler')), |
|
transformer_decoder=dict( |
|
init_cfg=None, |
|
layer_cfg=dict( |
|
cross_attn_cfg=dict( |
|
attn_drop=0.0, |
|
batch_first=True, |
|
dropout_layer=None, |
|
embed_dims=256, |
|
num_heads=8, |
|
proj_drop=0.0), |
|
ffn_cfg=dict( |
|
act_cfg=dict(inplace=True, type='ReLU'), |
|
add_identity=True, |
|
dropout_layer=None, |
|
embed_dims=256, |
|
feedforward_channels=2048, |
|
ffn_drop=0.0, |
|
num_fcs=2), |
|
self_attn_cfg=dict( |
|
attn_drop=0.0, |
|
batch_first=True, |
|
dropout_layer=None, |
|
embed_dims=256, |
|
num_heads=8, |
|
proj_drop=0.0)), |
|
num_layers=9, |
|
return_intermediate=True), |
|
type='Mask2FormerHead'), |
|
test_cfg=dict(mode='whole'), |
|
train_cfg=dict(), |
|
type='EncoderDecoder') |
|
|
|
|
|
|
|
|