|
{ |
|
"architectures": [ |
|
"OmnivoreForJointClassification" |
|
], |
|
"attention_dropout_rate": 0.0, |
|
"depth_mode": "summed_rgb_d_tokens", |
|
"depths": [ |
|
2, |
|
2, |
|
18, |
|
2 |
|
], |
|
"drop_path_rate": 0.3, |
|
"dropout_rate": 0.0, |
|
"embed_dim": 96, |
|
"frozen_stages": -1, |
|
"head_dim_in": 768, |
|
"initializer_range": 0.02, |
|
"input_channels": 3, |
|
"mlp_ratio": 4.0, |
|
"model_type": "omnivore", |
|
"num_heads": [ |
|
3, |
|
6, |
|
12, |
|
24 |
|
], |
|
"num_image_labels": 1000, |
|
"num_rgbd_labels": 19, |
|
"num_video_labels": 400, |
|
"patch_norm": true, |
|
"patch_size": [ |
|
2, |
|
4, |
|
4 |
|
], |
|
"qk_scale": null, |
|
"qkv_bias": true, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.21.0.dev0", |
|
"window_size": [ |
|
8, |
|
7, |
|
7 |
|
] |
|
} |
|
|