diff --git "a/ndarray-cache-b16.json" "b/ndarray-cache-b16.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache-b16.json" @@ -0,0 +1,8514 @@ +{ + "metadata": { + "ParamSize": 724, + "ParamBytes": 3616698368.0, + "BitsPerParam": 7.147411884369364 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 49250304, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 32064, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 49250304, + "byteOffset": 0 + } + ], + "md5sum": "a2730de6700a8da8f261d118b62f7c2f" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.18.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "2d87f603cbd40cc1e02107d1c79ac5d5" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 23470080, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 32064, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6156288, + "byteOffset": 0 + }, + { + "name": "model.h.18.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 6156288 + }, + { + "name": "model.h.18.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 6162432 + }, + { + "name": "model.h.18.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 18745344 + }, + { + "name": "model.h.18.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 20318208 + }, + { + "name": "model.h.18.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 23463936 + } + ], + "md5sum": "c0176ceda7639d2f93622aecdb57fd72" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.19.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0932f259717d20831b3e4b840854078b" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.18.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.18.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.19.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.19.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.19.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.19.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.19.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "35ace1579e08797658ea555f65591325" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "model.h.19.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.19.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.19.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.19.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.20.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "dfaf39fc98e1fd3e5fad7f2614dcf10b" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.20.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "184b3ecb41f2ed3bb57167a9c54cdea1" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.20.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.20.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.20.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.20.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.20.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.20.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "ded4529eceb941a62efd5a32055c40fd" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.21.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "ce3bafd5a67f525021d8a32f30f3ee23" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.20.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.20.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.21.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.21.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.21.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.21.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.21.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "459ba8a6ddfb193f3e79c12fc7f1deb4" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "model.h.21.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.21.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.21.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.21.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.22.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "3042e78776d44704c65ab5323de2ec63" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.22.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "7a7a195a4ecf23cc2422cf9ef7c2f579" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.22.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.22.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.22.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.22.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.22.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.22.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "e4f69336435b2967ea7a3d362efb830f" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.23.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "a146111ecad96618333340c4e0ad6cb6" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.22.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.22.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.23.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.23.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.23.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.23.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.23.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "f0253df1203b62b0ea34c971e21e7868" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "model.h.23.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.23.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.23.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.23.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.24.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "5345493ecb8246c25ec64722798b713e" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.24.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "bcee40ddb87bb0ecf2cc95891dd4d55a" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.24.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.24.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.24.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.24.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.24.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.24.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "d47f29d6588fdbc236db4e9a3cc8c847" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.25.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "c621d09ed1d897a45b7108f5bb02d5bd" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.24.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.24.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.25.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.25.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.25.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.25.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.25.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "3d4cee43c30013960013451d2630819e" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "model.h.25.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.25.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.25.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.25.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.26.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "bd193921647ae72d91094a30329611ad" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.26.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "808339d416cb56985f4adfec7a0a6509" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.26.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.26.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.26.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.26.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.26.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.26.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "61b2aa6694b1361f7c670d1692e16168" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.27.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "c6b006f80c310f314db7b6ebabfe5b23" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.26.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.26.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.27.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.27.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.27.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.27.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.27.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "78d21de072db62ce75903a9d77761383" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "model.h.27.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.27.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.27.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.27.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.28.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "2d377c0a2b288b3e64b2226f24bc4baa" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.28.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "6f4702d5229c84b5f5f020b2236daec4" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.28.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.28.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.28.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.28.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.28.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.28.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "ecf846c8c16d60521f8d6c05be38dceb" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.29.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "872c8b2d35317e8b9887bce5e1e2e509" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.28.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.28.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.29.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.29.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.29.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.29.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.29.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "1190b419bed563a81feda602c062fb9c" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "model.h.29.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.29.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.29.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.29.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.30.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "add1825fb7e3ac0441669ac726c095f4" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.30.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "9dcba5947f6e4d834dbbfbe48fcd6a88" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.30.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.30.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.30.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.30.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.30.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.30.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "3bbd98737ceab5a2d91da88254e99e7f" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.31.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "115d3216c992ec286bb0b7ebc6764d75" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.30.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.30.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.31.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.31.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.31.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.31.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.31.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "47a4f2967e2fbc44edad8d664fc458fe" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 49250304, + "records": [ + { + "name": "model.embd.q_weight", + "shape": [ + 32064, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 49250304, + "byteOffset": 0 + } + ], + "md5sum": "9473898a683287b15fa20c80820a1a80" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 27402240, + "records": [ + { + "name": "model.h.31.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.31.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.31.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.31.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.norm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 21233664 + }, + { + "name": "model.embd.q_scale", + "shape": [ + 32064, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6156288, + "byteOffset": 21239808 + }, + { + "name": "model.h.0.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 27396096 + } + ], + "md5sum": "55b05f12c534a87b3a22c6bbf6fa2329" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.0.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "6ff758d383d7bfc1b57e6bef512334f6" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.0.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.0.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.0.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.0.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.0.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.0.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "2fd0c7547e944b5dccf9e130e14d994e" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.1.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "ab1ab2eefb8d5f933d8b7dd1b24471a6" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.0.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.0.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.1.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.1.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.1.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.1.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.1.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "1d2f8e4ffe9a989838f545ef6e12d6fd" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "model.h.1.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.1.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.1.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.1.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.10.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "b74341c39bd3ef26350ca53836f8c791" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.10.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "6b84e05237d31409763ee66129e56c38" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.10.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.10.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.10.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.10.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.10.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.10.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "5a823ca7de0f4e52b85080a9e2858ec0" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.11.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "4571c5859b9c1b57bedebb97d538bbc2" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.10.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.10.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.11.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.11.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.11.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.11.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.11.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "6938afd56e1d315d2e18fd298ea24cd3" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "model.h.11.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.11.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.11.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.11.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.12.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "0ababb0faa05ac4b075cec0a74238708" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.12.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "a952d9d81b1d89d2dd8257012255f5d1" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.12.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.12.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.12.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.12.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.12.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.12.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "813f3c9422c6069eb65133c77f89abce" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.13.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "728ccd85eb8b4760b119e4a47027a67c" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.12.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.12.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.13.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.13.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.13.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.13.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.13.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "784a6ab4d0ea102054bff4b0f9ceb3d1" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "model.h.13.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.13.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.13.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.13.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.14.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "1062ae0b66c07a9eca7ee42a96accba8" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.14.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "5048cfa14492fdc8defae0228829ecae" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.14.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.14.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.14.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.14.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.14.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.14.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "f8726cd482de23808b02dbc9a6380d26" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.15.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f41f72ca893928743b0819556e7bb3c8" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.14.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.14.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.15.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.15.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.15.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.15.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.15.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "c483c46c548edc8f5b178fad748087dd" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "model.h.15.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.15.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.15.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.15.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.16.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "8f46057779e5691a9c3fe4888bb9de92" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.16.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "d4934aa10009bdabe96f10853da997ab" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.16.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.16.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.16.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.16.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.16.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.16.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "21e5639fda8cb757002d75277ae09404" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.17.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e7788fb5830a85dd6c5cb83c524de0d9" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.16.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.16.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.17.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.17.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.17.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.17.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.17.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "28e9a770d395e47416263647d6fe8b91" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 26548224, + "records": [ + { + "name": "model.h.17.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.17.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.17.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.17.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.18.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21233664 + }, + { + "name": "model.h.18.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 25952256 + }, + { + "name": "model.h.2.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 26542080 + } + ], + "md5sum": "90c950139fd5235a6f98bc9a2fa1ac34" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.2.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "26e3057f7eaa708047e619e41481aa39" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.2.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.2.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.2.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.2.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.2.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.2.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "3f1ac1713af531ebc445ca98eee8da9b" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.3.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0a38a45bb17da5cee40180f2f75c53ba" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.2.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.2.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.3.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.3.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.3.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.3.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.3.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "0b9fc2da0688d4960726c06acbe42037" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "model.h.3.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.3.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.3.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.3.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.4.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "c142df48619f29c8bf3c525a1b69f607" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.4.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "a1937eca44a09ca8d5d119cac0710e45" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.4.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.4.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.4.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.4.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.4.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.4.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "1141250328b11b976141c4d4423998fe" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.5.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "ef0eef50809ddac84348f609da645d19" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.4.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.4.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.5.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.5.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.5.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.5.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.5.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "826989af93cbf1aaab6875afe5e9991d" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "model.h.5.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.5.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.5.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.5.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.6.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "99266d8e478da4e71804b179fcb6d145" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.6.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e59b9d24aa1e4aef4a1cdc8f4e9a44db" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.6.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.6.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.6.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.6.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.6.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.6.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "0cdd63a4a4bb32b016c0eb6f1a49c63b" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.7.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "c675ac04803549857bef472c165e0a77" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.6.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.6.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.7.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.7.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.7.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.7.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.7.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "5efd93ae10b1c6f4b17c9a6b1e7cdb19" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "model.h.7.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.7.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.7.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.7.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.8.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "321761153751363bd26c6f8f11e88a0c" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.8.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "80ad2d45d4c3c63ce24e81645d034b74" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.8.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.8.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.8.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.8.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.8.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.8.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "9afcb2ca0c91eb76fbda757309b88b58" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.9.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "77bdf7012687732e5175e9d4b67b7b66" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.8.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.8.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.9.ln.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.9.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.9.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.9.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.9.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "bb69d374b22d4a7437d081eac0def25c" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 32036864, + "records": [ + { + "name": "model.h.9.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.9.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.9.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.9.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "vision_embed_tokens.glb_GN", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.embeddings.class_embedding", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 21241856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.embeddings.patch_embedding.weight", + "shape": [ + 1024, + 3, + 14, + 14 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1204224, + "byteOffset": 21243904 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.embeddings.position_embedding.weight", + "shape": [ + 577, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1181696, + "byteOffset": 22448128 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 23629824 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 23631872 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 23633920 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 23635968 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 23638016 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 23646208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 32034816 + } + ], + "md5sum": "a7d9ae4deaea6c486bff82f9bde35623" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "c8b538e60f975970e8d0114577dee382" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "683e2f92ba10f7a9705f52c6cb51368e" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "adfafbe7d6959d9569c06ceadc8d7c34" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "24d41ecf557c5f984d40d215effcb6c9" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "73750663aec89f8ee4fa3d34c3795150" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "a8fdf964a0bf7c0f2fd2e43b58cf8882" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "6bedb2090ba553bf7ce53c5b11227500" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "7ba91f13cad2b1eb14419a63379f185d" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "b00d568dfa33f1e269e838b50ae68cec" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "9f6fa0f980598d760dc7579e92031891" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "7c807048ef28f5cb8ec6ba0ffff9a5e6" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "0d349d70e68ac4565a701add22a3f848" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "b035699cefdbe31b0aec42eb8e1b8031" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "55ca8afceb79e197e4f486c30fa99d2f" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "5a06a1571b9cacbc8316c5b4cd99abf1" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "344a5b0799be7fedcb55796de58bff4e" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "d98c360eec434f7dcda0392eaae948d5" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "bffa470a26c1841e2812df3f7309e1e8" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "83b41f7765f019cc8a8166b401989905" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "49f40ce6f5f45f4d3717724aab652667" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "5cd95026a2d7f8360f1d3bf2e2f2ff7f" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "04877e8c02cbd383419f46beaebe7d6a" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "7cf1ead00f736b6144ae39774c21f3f2" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 29200384, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.post_layernorm.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.post_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.pre_layrnorm.bias", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.pre_layrnorm.weight", + "shape": [ + 1024 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_projection.linear_1.bias", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_projection.linear_1.q_weight", + "shape": [ + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16799744 + }, + { + "name": "vision_embed_tokens.img_projection.linear_1.q_scale", + "shape": [ + 3072, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 786432, + "byteOffset": 23091200 + }, + { + "name": "vision_embed_tokens.img_projection.linear_2.bias", + "shape": [ + 3072 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 6144, + "byteOffset": 23877632 + }, + { + "name": "vision_embed_tokens.img_projection.linear_2.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 23883776 + }, + { + "name": "vision_embed_tokens.img_projection.linear_2.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 589824, + "byteOffset": 28602368 + }, + { + "name": "vision_embed_tokens.sub_GN", + "shape": [ + 1, + 1, + 1, + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 29192192 + } + ], + "md5sum": "205528c07e65479e4ca999084a25016b" + } + ] +} \ No newline at end of file