{ "data": { "image_shape": [ 420, 560 ], "shape_constraints": { "patch_size": 14, "pixels_bounds": [ 1800, 2400 ], "ratio_bounds": [ 0.66, 2.0 ] } }, "eps": 1e-06, "generic": { "deterministic": true, "seed": 13 }, "model": { "expansion": 4, "name": "UniDepthV2", "num_heads": 8, "pixel_decoder": { "depths": [ 6, 0, 0 ], "dropout": 0.0, "hidden_dim": 512 }, "pixel_encoder": { "depths": [ 21, 22, 23, 24 ], "embed_dim": 1024, "embed_dims": [ 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024 ], "name": "dinov2_vitl14", "output_idx": [ 21, 22, 23, 24 ], "patch_size": 14, "pretrained": null, "stacking_fn": "last", "use_norm": true } }, "training": {} }