English
cloud-adapter-models / cloud-adapter-configs /multi_classes_512x512.py
XavierJiezou's picture
Add files using upload-large-folder tool
0467378 verified
raw
history blame
5.7 kB
crop_size = (
512,
512,
)
model = dict(
backbone=dict(
adapter_index=[
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
],
block_chunks=0,
depth=24,
embed_dim=1024,
ffn_bias=True,
ffn_layer='mlp',
has_cat=False,
img_size=512,
init_values=1e-05,
mlp_ratio=4,
num_heads=16,
cloud_adapter_config=dict(
cnn_type='pmaa',
context_dim=64,
depth=4,
emd_dim=1024,
global_groups=1,
hidden_channels=64,
int_type='convnext',
local_groups=1,
num_layers=24,
rank_dim=16,
return_last_feature=False,
return_multi_feats=False,
type='CloudAdapter'),
patch_size=16,
proj_bias=True,
qkv_bias=True,
type='CloudAdapterDinoVisionTransformer'),
data_preprocessor=dict(
bgr_to_rgb=True,
mean=[
123.675,
116.28,
103.53,
],
pad_val=0,
seg_pad_val=255,
size=(
512,
512,
),
std=[
58.395,
57.12,
57.375,
],
type='SegDataPreProcessor'),
decode_head=dict(
align_corners=False,
enforce_decoder_input_project=False,
feat_channels=256,
in_channels=[
1024,
1024,
1024,
1024,
],
loss_cls=dict(
class_weight=[
1.0,
1.0,
1.0,
1.0,
0.1,
],
loss_weight=2.0,
reduction='mean',
type='mmdet.CrossEntropyLoss',
use_sigmoid=False),
loss_dice=dict(
activate=True,
eps=1.0,
loss_weight=5.0,
naive_dice=True,
reduction='mean',
type='mmdet.DiceLoss',
use_sigmoid=True),
loss_mask=dict(
loss_weight=5.0,
reduction='mean',
type='mmdet.CrossEntropyLoss',
use_sigmoid=True),
num_classes=4,
num_queries=100,
num_transformer_feat_level=3,
out_channels=256,
pixel_decoder=dict(
act_cfg=dict(type='ReLU'),
encoder=dict(
init_cfg=None,
layer_cfg=dict(
ffn_cfg=dict(
act_cfg=dict(inplace=True, type='ReLU'),
embed_dims=256,
feedforward_channels=1024,
ffn_drop=0.0,
num_fcs=2),
self_attn_cfg=dict(
batch_first=True,
dropout=0.0,
embed_dims=256,
im2col_step=64,
init_cfg=None,
norm_cfg=None,
num_heads=8,
num_levels=3,
num_points=4)),
num_layers=6),
init_cfg=None,
norm_cfg=dict(num_groups=32, type='GN'),
num_outs=3,
positional_encoding=dict(normalize=True, num_feats=128),
type='mmdet.MSDeformAttnPixelDecoder'),
positional_encoding=dict(normalize=True, num_feats=128),
strides=[
4,
8,
16,
32,
],
train_cfg=dict(
assigner=dict(
match_costs=[
dict(type='mmdet.ClassificationCost', weight=2.0),
dict(
type='mmdet.CrossEntropyLossCost',
use_sigmoid=True,
weight=5.0),
dict(
eps=1.0,
pred_act=True,
type='mmdet.DiceCost',
weight=5.0),
],
type='mmdet.HungarianAssigner'),
importance_sample_ratio=0.75,
num_points=12544,
oversample_ratio=3.0,
sampler=dict(type='mmdet.MaskPseudoSampler')),
transformer_decoder=dict(
init_cfg=None,
layer_cfg=dict(
cross_attn_cfg=dict(
attn_drop=0.0,
batch_first=True,
dropout_layer=None,
embed_dims=256,
num_heads=8,
proj_drop=0.0),
ffn_cfg=dict(
act_cfg=dict(inplace=True, type='ReLU'),
add_identity=True,
dropout_layer=None,
embed_dims=256,
feedforward_channels=2048,
ffn_drop=0.0,
num_fcs=2),
self_attn_cfg=dict(
attn_drop=0.0,
batch_first=True,
dropout_layer=None,
embed_dims=256,
num_heads=8,
proj_drop=0.0)),
num_layers=9,
return_intermediate=True),
type='Mask2FormerHead'),
test_cfg=dict(mode='whole'),
train_cfg=dict(),
type='EncoderDecoder')