{ "architectures": [ "OmnivoreForJointClassification" ], "attention_dropout_rate": 0.0, "depth_mode": "summed_rgb_d_tokens", "depths": [ 2, 2, 18, 2 ], "drop_path_rate": 0.3, "dropout_rate": 0.0, "embed_dim": 96, "frozen_stages": -1, "head_dim_in": 768, "initializer_range": 0.02, "input_channels": 3, "mlp_ratio": 4.0, "model_type": "omnivore", "num_heads": [ 3, 6, 12, 24 ], "num_image_labels": 1000, "num_rgbd_labels": 19, "num_video_labels": 400, "patch_norm": true, "patch_size": [ 2, 4, 4 ], "qk_scale": null, "qkv_bias": true, "torch_dtype": "float32", "transformers_version": "4.21.0.dev0", "window_size": [ 8, 7, 7 ] }