{ "metadata": { "ParamSize": 603, "ParamBytes": 2205403632.0, "BitsPerParam": 4.5026980373740555 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 49171968, "records": [ { "name": "language_model.lm_head.q_weight", "shape": [ 384, 32013 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 49171968, "byteOffset": 0 } ], "md5sum": "534a4503ce6adf828ca624be0f69ccf1" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.20.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "dab9d14c49ae0837eb84136e09c2ec5e" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 23466432, "records": [ { "name": "language_model.lm_head.q_scale", "shape": [ 96, 32013 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6146496, "byteOffset": 0 }, { "name": "language_model.transformer.h.20.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 6146496 }, { "name": "language_model.transformer.h.20.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 6152640 }, { "name": "language_model.transformer.h.20.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 18735552 }, { "name": "language_model.transformer.h.20.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 20308416 }, { "name": "language_model.transformer.h.20.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23454144 }, { "name": "language_model.transformer.h.21.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23460288 } ], "md5sum": "819d230f80e5be477f566a917819fc4f" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.21.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4bee7a12045cd7cf00a3c85fdf70dc04" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "language_model.transformer.h.21.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "language_model.transformer.h.21.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "language_model.transformer.h.21.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.21.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "language_model.transformer.h.21.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "language_model.transformer.h.21.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "b17c192dfa8d5492a3a0b576a14f004e" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.22.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7f85780bd641286afa467a54e9ac0562" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "language_model.transformer.h.21.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "language_model.transformer.h.21.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.22.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "language_model.transformer.h.22.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "language_model.transformer.h.22.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "language_model.transformer.h.22.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "language_model.transformer.h.22.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "97a4102dad83f6a72e495f41c78086f3" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "language_model.transformer.h.22.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "language_model.transformer.h.22.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "language_model.transformer.h.22.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "language_model.transformer.h.22.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "language_model.transformer.h.23.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "e564becb662cd29e109ae93c61d5acc7" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.23.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "38ccc0e20bbd23646f56748d4b22a5c9" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "language_model.transformer.h.23.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "language_model.transformer.h.23.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "language_model.transformer.h.23.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.23.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "language_model.transformer.h.23.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "language_model.transformer.h.23.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "3cbe6a8de5ab878f7a8fe8b9a82ae93c" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.24.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "303ed99ed5a5b427fc911c5297f5e489" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "language_model.transformer.h.23.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "language_model.transformer.h.23.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.24.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "language_model.transformer.h.24.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "language_model.transformer.h.24.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "language_model.transformer.h.24.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "language_model.transformer.h.24.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "46e8c585e3ba5bf880736fefa822dc4b" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "language_model.transformer.h.24.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "language_model.transformer.h.24.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "language_model.transformer.h.24.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "language_model.transformer.h.24.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "language_model.transformer.h.25.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "b263751fd961edd283e4510f535e4df0" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.25.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b1a92b7cce85c4cb2a0cb7d89a0a995c" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "language_model.transformer.h.25.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "language_model.transformer.h.25.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "language_model.transformer.h.25.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.25.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "language_model.transformer.h.25.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "language_model.transformer.h.25.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "004b5fce58aa9d1105d16f8cd13ad0a9" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.26.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "062fb7e94edb13465e5f9da1000c43c4" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "language_model.transformer.h.25.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "language_model.transformer.h.25.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.26.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "language_model.transformer.h.26.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "language_model.transformer.h.26.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "language_model.transformer.h.26.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "language_model.transformer.h.26.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "da2912c2275ec0c4030ed6a6c8eee125" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "language_model.transformer.h.26.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "language_model.transformer.h.26.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "language_model.transformer.h.26.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "language_model.transformer.h.26.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "language_model.transformer.h.27.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "6bbe0f5aa30c5dd9247b22fd7d877497" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.27.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "855d79bacb460a7cf3c774761d548e82" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "language_model.transformer.h.27.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "language_model.transformer.h.27.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "language_model.transformer.h.27.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.27.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "language_model.transformer.h.27.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "language_model.transformer.h.27.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "79ede332082eee0628a4577dbf25b6b8" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.28.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "37eea3fcd383d47adf108e18883330e7" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "language_model.transformer.h.27.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "language_model.transformer.h.27.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.28.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "language_model.transformer.h.28.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "language_model.transformer.h.28.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "language_model.transformer.h.28.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "language_model.transformer.h.28.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "744c9c923907aee0ff78a9ba29244924" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "language_model.transformer.h.28.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "language_model.transformer.h.28.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "language_model.transformer.h.28.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "language_model.transformer.h.28.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "language_model.transformer.h.29.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "dd41d25624fb3eb258959f40d16abfd9" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.29.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3afe21abf3902eef33a873c885b6117a" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "language_model.transformer.h.29.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "language_model.transformer.h.29.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "language_model.transformer.h.29.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.29.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "language_model.transformer.h.29.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "language_model.transformer.h.29.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "cf3d9d16b3bb0cd34f89ea1491715c61" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.30.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "934783d08078020aa8ae3960d4bbd447" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "language_model.transformer.h.29.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "language_model.transformer.h.29.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.30.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "language_model.transformer.h.30.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "language_model.transformer.h.30.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "language_model.transformer.h.30.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "language_model.transformer.h.30.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "fd9b73675d3f7ac7cd146d398f8f8b24" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "language_model.transformer.h.30.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "language_model.transformer.h.30.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "language_model.transformer.h.30.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "language_model.transformer.h.30.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "language_model.transformer.h.31.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "bc1b43f23113dd1567ae82762d4da05d" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.31.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "58a6b7a2150c2aa8047ebde03b92d29d" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "language_model.transformer.h.31.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "language_model.transformer.h.31.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "language_model.transformer.h.31.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.31.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "language_model.transformer.h.31.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "language_model.transformer.h.31.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "28b2b9865d5acaacc1940d4f30ab409e" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 49171968, "records": [ { "name": "language_model.transformer.embd.q_weight", "shape": [ 32013, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 49171968, "byteOffset": 0 } ], "md5sum": "d925e9965fa1cde7661a5a9582abef33" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 22084032, "records": [ { "name": "language_model.transformer.h.31.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "language_model.transformer.h.31.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "language_model.transformer.norm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "language_model.transformer.embd.q_scale", "shape": [ 32013, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6146496, "byteOffset": 15931392 }, { "name": "language_model.transformer.h.0.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 22077888 } ], "md5sum": "3f8998ea9230b4cb7b8d524853f7e8ca" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.0.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c6a9012c326d32d1c2673c06ec16adb5" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "language_model.transformer.h.0.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "language_model.transformer.h.0.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "language_model.transformer.h.0.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.0.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "language_model.transformer.h.0.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "language_model.transformer.h.0.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "aa81ca8b40bbe8496cf2bb48bd2bd32a" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.1.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "cbbc1ffdadaa0a9f4bcc08fc1c728983" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "language_model.transformer.h.0.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "language_model.transformer.h.0.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.1.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "language_model.transformer.h.1.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "language_model.transformer.h.1.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "language_model.transformer.h.1.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "language_model.transformer.h.1.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "dd886bdc6177a281f7c0441570b279ac" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "language_model.transformer.h.1.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "language_model.transformer.h.1.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "language_model.transformer.h.1.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "language_model.transformer.h.1.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "language_model.transformer.h.10.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "094e51bf6fbe5668071b9abba38ecc52" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.10.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c4ba449e5cd9fac5358f6ff7a1681aee" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "language_model.transformer.h.10.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "language_model.transformer.h.10.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "language_model.transformer.h.10.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.10.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "language_model.transformer.h.10.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "language_model.transformer.h.10.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "5ab82fbe4d49e155f1b18ffb71c5a25d" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.11.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ef17922c154364e5467274c30eaf9b15" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "language_model.transformer.h.10.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "language_model.transformer.h.10.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.11.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "language_model.transformer.h.11.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "language_model.transformer.h.11.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "language_model.transformer.h.11.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "language_model.transformer.h.11.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "2c7d32b0bcdfe097d5e48c312b75934a" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "language_model.transformer.h.11.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "language_model.transformer.h.11.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "language_model.transformer.h.11.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "language_model.transformer.h.11.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "language_model.transformer.h.12.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "c8f33f1b5eabbb55a2b6a6e465b85403" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.12.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3ced1594b9ac8b8c9155544658dab89b" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "language_model.transformer.h.12.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "language_model.transformer.h.12.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "language_model.transformer.h.12.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.12.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "language_model.transformer.h.12.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "language_model.transformer.h.12.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "d9ae7c4febfbfa845f72983641638d0c" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.13.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "801602b8f835e77ad963bb2bad3223d6" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "language_model.transformer.h.12.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "language_model.transformer.h.12.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.13.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "language_model.transformer.h.13.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "language_model.transformer.h.13.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "language_model.transformer.h.13.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "language_model.transformer.h.13.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "2842aa4a65553d150c11e4005d37dc41" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "language_model.transformer.h.13.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "language_model.transformer.h.13.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "language_model.transformer.h.13.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "language_model.transformer.h.13.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "language_model.transformer.h.14.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "1b6b1f2b9d7399ad3e9d00cf3693350a" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.14.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "19884c683d799d904c43a829108c2cb4" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "language_model.transformer.h.14.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "language_model.transformer.h.14.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "language_model.transformer.h.14.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.14.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "language_model.transformer.h.14.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "language_model.transformer.h.14.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "cdd36d8b8feb237fbc9a7742e2be2000" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.15.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "bb8d946b090787e8eb729fdcb7d935f6" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "language_model.transformer.h.14.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "language_model.transformer.h.14.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.15.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "language_model.transformer.h.15.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "language_model.transformer.h.15.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "language_model.transformer.h.15.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "language_model.transformer.h.15.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "d9284e74e6c13f29ac6d5aa22688a362" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "language_model.transformer.h.15.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "language_model.transformer.h.15.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "language_model.transformer.h.15.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "language_model.transformer.h.15.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "language_model.transformer.h.16.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "16f758eaf98ead8677acb837f506a31c" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.16.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4793f52a52233d8157c20aae7c962956" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "language_model.transformer.h.16.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "language_model.transformer.h.16.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "language_model.transformer.h.16.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.16.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "language_model.transformer.h.16.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "language_model.transformer.h.16.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "cf81c45ae06440f51642082bb8b3b75b" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.17.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0501ba9ece1843d14bea049cbc18fab4" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "language_model.transformer.h.16.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "language_model.transformer.h.16.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.17.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "language_model.transformer.h.17.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "language_model.transformer.h.17.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "language_model.transformer.h.17.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "language_model.transformer.h.17.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "e20a633da091d7ffb66994b3f947e327" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "language_model.transformer.h.17.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "language_model.transformer.h.17.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "language_model.transformer.h.17.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "language_model.transformer.h.17.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "language_model.transformer.h.18.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "b608b524e50b88afbbe3b979745cbdc5" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.18.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "cce5d16eb6996bea8e01805fc728a155" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "language_model.transformer.h.18.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "language_model.transformer.h.18.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "language_model.transformer.h.18.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.18.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "language_model.transformer.h.18.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "language_model.transformer.h.18.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "f69334ef43c202fb4e7dcb7a9a9998b7" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.19.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "10a14a8f06975580c23775b4fafc13c4" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "language_model.transformer.h.18.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "language_model.transformer.h.18.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.19.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "language_model.transformer.h.19.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "language_model.transformer.h.19.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "language_model.transformer.h.19.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "language_model.transformer.h.19.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "cf2459819e1610434cac78acac1433b3" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "language_model.transformer.h.19.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "language_model.transformer.h.19.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "language_model.transformer.h.19.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "language_model.transformer.h.19.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "language_model.transformer.h.2.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "8939db93389884a3bd25197fa754f29a" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.2.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e2979771e8beb782b6fd5ee5e6540766" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "language_model.transformer.h.2.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "language_model.transformer.h.2.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "language_model.transformer.h.2.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.2.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "language_model.transformer.h.2.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "language_model.transformer.h.2.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "aae8c3c0cbf787854f88778405781c2a" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "language_model.transformer.h.2.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "language_model.transformer.h.2.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.20.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "language_model.transformer.h.20.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 } ], "md5sum": "32a4a69a99c27777f0c703d77e33a61f" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.3.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "86f3842e40b85b7a721c22c2ccb265fe" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "language_model.transformer.h.20.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "language_model.transformer.h.20.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.3.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "language_model.transformer.h.3.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "language_model.transformer.h.3.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "language_model.transformer.h.3.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "language_model.transformer.h.3.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "fef5dabd82727209c2724b306b8c75b2" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "language_model.transformer.h.3.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "language_model.transformer.h.3.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "language_model.transformer.h.3.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "language_model.transformer.h.3.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "language_model.transformer.h.4.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "5cf2675da69741ad745b679ba2cf9079" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.4.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7e2899b321277a06c9273d4d7032e9c1" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "language_model.transformer.h.4.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "language_model.transformer.h.4.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "language_model.transformer.h.4.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.4.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "language_model.transformer.h.4.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "language_model.transformer.h.4.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "2b1ba6b121fd8d5b6142addcf6b3f5f3" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.5.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1dcf802828a044e86010484d670428fe" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "language_model.transformer.h.4.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "language_model.transformer.h.4.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.5.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "language_model.transformer.h.5.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "language_model.transformer.h.5.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "language_model.transformer.h.5.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "language_model.transformer.h.5.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "43f68ceadf6b5e7d26494d9b6d26c624" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "language_model.transformer.h.5.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "language_model.transformer.h.5.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "language_model.transformer.h.5.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "language_model.transformer.h.5.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "language_model.transformer.h.6.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "f6f4060bdfc2822696b690675054afcc" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.6.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "28cde7bfa16f8205bc9fcf9c770f7536" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "language_model.transformer.h.6.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "language_model.transformer.h.6.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "language_model.transformer.h.6.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.6.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "language_model.transformer.h.6.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "language_model.transformer.h.6.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "4c4ea79de79db11b2ceb539686ae6055" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.7.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f3570c26b41c2be437d09722f7e01890" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "language_model.transformer.h.6.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "language_model.transformer.h.6.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.7.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "language_model.transformer.h.7.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "language_model.transformer.h.7.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "language_model.transformer.h.7.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "language_model.transformer.h.7.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "d4d69af860a2768453ef7ec582612b99" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "language_model.transformer.h.7.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "language_model.transformer.h.7.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "language_model.transformer.h.7.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "language_model.transformer.h.7.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "language_model.transformer.h.8.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "33c6c4ac362ee57bcb2f47c8cbf4ddfe" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.8.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3495dec20eb348a8a904ed50429906c5" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "language_model.transformer.h.8.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "language_model.transformer.h.8.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "language_model.transformer.h.8.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.8.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "language_model.transformer.h.8.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "language_model.transformer.h.8.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "95cb76cd55cd7b6594c1255ba16ae6d1" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "language_model.transformer.h.9.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "bd890b1435fe9da6e875b1189fa3ba74" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "language_model.transformer.h.8.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "language_model.transformer.h.8.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "language_model.transformer.h.9.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "language_model.transformer.h.9.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "language_model.transformer.h.9.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "language_model.transformer.h.9.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "language_model.transformer.h.9.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "c737149da70d99bfa8724f19f71187b7" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 33161328, "records": [ { "name": "language_model.transformer.h.9.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "language_model.transformer.h.9.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "language_model.transformer.h.9.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "language_model.transformer.h.9.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "multi_modal_projector.linear_1.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 }, { "name": "multi_modal_projector.linear_1.q_weight", "shape": [ 96, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 21239808 }, { "name": "multi_modal_projector.linear_1.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 22419456 }, { "name": "multi_modal_projector.linear_2.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 22566912 }, { "name": "multi_modal_projector.linear_2.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 22573056 }, { "name": "multi_modal_projector.linear_2.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 27291648 }, { "name": "vision_tower.vision_model.embeddings.class_embedding", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 27881472 }, { "name": "vision_tower.vision_model.embeddings.patch_embedding.weight", "shape": [ 768, 3, 16, 16 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 27883008 }, { "name": "vision_tower.vision_model.embeddings.position_embedding.q_weight", "shape": [ 197, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75648, "byteOffset": 29062656 }, { "name": "vision_tower.vision_model.embeddings.position_embedding.q_scale", "shape": [ 197, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9456, "byteOffset": 29138304 }, { "name": "vision_tower.vision_model.encoder.layers.0.layer_norm1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 29147760 }, { "name": "vision_tower.vision_model.encoder.layers.0.layer_norm1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 29149296 }, { "name": "vision_tower.vision_model.encoder.layers.0.layer_norm2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 29150832 }, { "name": "vision_tower.vision_model.encoder.layers.0.layer_norm2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 29152368 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 29153904 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc1.q_weight", "shape": [ 96, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 29160048 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc1.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 30339696 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 30487152 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc2.q_weight", "shape": [ 384, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 30488688 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc2.q_scale", "shape": [ 96, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 31668336 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 31815792 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 31817328 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 32112240 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 32149104 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 32150640 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 32445552 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 32482416 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 32483952 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 32778864 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 32815728 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 32817264 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 33112176 }, { "name": "vision_tower.vision_model.encoder.layers.1.layer_norm1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 33149040 }, { "name": "vision_tower.vision_model.encoder.layers.1.layer_norm1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 33150576 }, { "name": "vision_tower.vision_model.encoder.layers.1.layer_norm2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 33152112 }, { "name": "vision_tower.vision_model.encoder.layers.1.layer_norm2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 33153648 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33155184 } ], "md5sum": "b65ae859360e9214dda5ec266d00812f" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 33338880, "records": [ { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc1.q_weight", "shape": [ 96, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc1.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 1179648 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 1327104 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc2.q_weight", "shape": [ 384, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 1328640 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc2.q_scale", "shape": [ 96, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 2508288 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 2655744 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 2657280 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 2952192 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 2989056 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 2990592 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 3285504 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 3322368 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 3323904 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 3618816 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 3655680 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 3657216 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 3952128 }, { "name": "vision_tower.vision_model.encoder.layers.10.layer_norm1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 3988992 }, { "name": "vision_tower.vision_model.encoder.layers.10.layer_norm1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 3990528 }, { "name": "vision_tower.vision_model.encoder.layers.10.layer_norm2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 3992064 }, { "name": "vision_tower.vision_model.encoder.layers.10.layer_norm2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 3993600 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 3995136 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc1.q_weight", "shape": [ 96, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 4001280 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc1.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 5180928 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 5328384 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc2.q_weight", "shape": [ 384, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 5329920 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc2.q_scale", "shape": [ 96, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 6509568 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 6657024 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 6658560 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 6953472 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 6990336 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 6991872 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 7286784 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 7323648 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 7325184 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 7620096 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 7656960 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 7658496 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 7953408 }, { "name": "vision_tower.vision_model.encoder.layers.11.layer_norm1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 7990272 }, { "name": "vision_tower.vision_model.encoder.layers.11.layer_norm1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 7991808 }, { "name": "vision_tower.vision_model.encoder.layers.11.layer_norm2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 7993344 }, { "name": "vision_tower.vision_model.encoder.layers.11.layer_norm2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 7994880 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 7996416 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc1.q_weight", "shape": [ 96, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 8002560 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc1.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 9182208 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 9329664 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc2.q_weight", "shape": [ 384, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9331200 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc2.q_scale", "shape": [ 96, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 10510848 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 10658304 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 10659840 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 10954752 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 10991616 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 10993152 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 11288064 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 11324928 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 11326464 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 11621376 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 11658240 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 11659776 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 11954688 }, { "name": "vision_tower.vision_model.encoder.layers.2.layer_norm1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 11991552 }, { "name": "vision_tower.vision_model.encoder.layers.2.layer_norm1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 11993088 }, { "name": "vision_tower.vision_model.encoder.layers.2.layer_norm2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 11994624 }, { "name": "vision_tower.vision_model.encoder.layers.2.layer_norm2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 11996160 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 11997696 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc1.q_weight", "shape": [ 96, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 12003840 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc1.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 13183488 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 13330944 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc2.q_weight", "shape": [ 384, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 13332480 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc2.q_scale", "shape": [ 96, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 14512128 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 14659584 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 14661120 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 14956032 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 14992896 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 14994432 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 15289344 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 15326208 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 15327744 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 15622656 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 15659520 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 15661056 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 15955968 }, { "name": "vision_tower.vision_model.encoder.layers.3.layer_norm1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 15992832 }, { "name": "vision_tower.vision_model.encoder.layers.3.layer_norm1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 15994368 }, { "name": "vision_tower.vision_model.encoder.layers.3.layer_norm2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 15995904 }, { "name": "vision_tower.vision_model.encoder.layers.3.layer_norm2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 15997440 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15998976 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc1.q_weight", "shape": [ 96, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 16005120 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc1.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 17184768 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 17332224 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc2.q_weight", "shape": [ 384, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 17333760 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc2.q_scale", "shape": [ 96, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 18513408 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 18660864 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 18662400 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 18957312 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 18994176 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 18995712 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 19290624 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 19327488 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 19329024 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 19623936 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 19660800 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 19662336 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 19957248 }, { "name": "vision_tower.vision_model.encoder.layers.4.layer_norm1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 19994112 }, { "name": "vision_tower.vision_model.encoder.layers.4.layer_norm1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 19995648 }, { "name": "vision_tower.vision_model.encoder.layers.4.layer_norm2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 19997184 }, { "name": "vision_tower.vision_model.encoder.layers.4.layer_norm2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 19998720 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 20000256 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc1.q_weight", "shape": [ 96, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 20006400 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc1.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 21186048 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 21333504 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc2.q_weight", "shape": [ 384, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 21335040 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc2.q_scale", "shape": [ 96, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 22514688 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 22662144 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 22663680 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 22958592 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 22995456 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 22996992 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 23291904 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 23328768 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 23330304 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 23625216 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 23662080 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 23663616 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 23958528 }, { "name": "vision_tower.vision_model.encoder.layers.5.layer_norm1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 23995392 }, { "name": "vision_tower.vision_model.encoder.layers.5.layer_norm1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 23996928 }, { "name": "vision_tower.vision_model.encoder.layers.5.layer_norm2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 23998464 }, { "name": "vision_tower.vision_model.encoder.layers.5.layer_norm2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 24000000 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 24001536 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc1.q_weight", "shape": [ 96, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 24007680 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc1.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 25187328 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 25334784 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc2.q_weight", "shape": [ 384, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 25336320 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc2.q_scale", "shape": [ 96, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26515968 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 26663424 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 26664960 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 26959872 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 26996736 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 26998272 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 27293184 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 27330048 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 27331584 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 27626496 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 27663360 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 27664896 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 27959808 }, { "name": "vision_tower.vision_model.encoder.layers.6.layer_norm1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 27996672 }, { "name": "vision_tower.vision_model.encoder.layers.6.layer_norm1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 27998208 }, { "name": "vision_tower.vision_model.encoder.layers.6.layer_norm2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 27999744 }, { "name": "vision_tower.vision_model.encoder.layers.6.layer_norm2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 28001280 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 28002816 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc1.q_weight", "shape": [ 96, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 28008960 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc1.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 29188608 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 29336064 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc2.q_weight", "shape": [ 384, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 29337600 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc2.q_scale", "shape": [ 96, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 30517248 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 30664704 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 30666240 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 30961152 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 30998016 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 30999552 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 31294464 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 31331328 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 31332864 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 31627776 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 31664640 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 31666176 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 31961088 }, { "name": "vision_tower.vision_model.encoder.layers.7.layer_norm1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 31997952 }, { "name": "vision_tower.vision_model.encoder.layers.7.layer_norm1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 31999488 }, { "name": "vision_tower.vision_model.encoder.layers.7.layer_norm2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 32001024 }, { "name": "vision_tower.vision_model.encoder.layers.7.layer_norm2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 32002560 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32004096 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc1.q_weight", "shape": [ 96, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 32010240 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc1.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 33189888 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 33337344 } ], "md5sum": "4ef59e8f62bfc3bf40f11d27dcd5699c" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 10669056, "records": [ { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc2.q_weight", "shape": [ 384, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc2.q_scale", "shape": [ 96, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 1179648 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 1327104 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 1328640 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 1623552 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 1660416 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 1661952 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 1956864 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 1993728 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 1995264 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 2290176 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 2327040 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 2328576 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 2623488 }, { "name": "vision_tower.vision_model.encoder.layers.8.layer_norm1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 2660352 }, { "name": "vision_tower.vision_model.encoder.layers.8.layer_norm1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 2661888 }, { "name": "vision_tower.vision_model.encoder.layers.8.layer_norm2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 2663424 }, { "name": "vision_tower.vision_model.encoder.layers.8.layer_norm2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 2664960 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 2666496 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc1.q_weight", "shape": [ 96, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 2672640 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc1.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 3852288 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 3999744 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc2.q_weight", "shape": [ 384, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 4001280 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc2.q_scale", "shape": [ 96, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 5180928 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 5328384 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 5329920 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 5624832 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 5661696 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 5663232 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 5958144 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 5995008 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 5996544 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 6291456 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 6328320 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 6329856 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 6624768 }, { "name": "vision_tower.vision_model.encoder.layers.9.layer_norm1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 6661632 }, { "name": "vision_tower.vision_model.encoder.layers.9.layer_norm1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 6663168 }, { "name": "vision_tower.vision_model.encoder.layers.9.layer_norm2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 6664704 }, { "name": "vision_tower.vision_model.encoder.layers.9.layer_norm2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 6666240 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 6667776 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc1.q_weight", "shape": [ 96, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 6673920 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc1.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 7853568 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 8001024 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc2.q_weight", "shape": [ 384, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 8002560 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc2.q_scale", "shape": [ 96, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 9182208 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 9329664 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 9331200 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 9626112 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 9662976 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 9664512 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 9959424 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 9996288 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 9997824 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 10292736 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 10329600 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.q_weight", "shape": [ 96, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 10331136 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.q_scale", "shape": [ 24, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 10626048 }, { "name": "vision_tower.vision_model.post_layernorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 10662912 }, { "name": "vision_tower.vision_model.post_layernorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 10664448 }, { "name": "vision_tower.vision_model.pre_layrnorm.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 10665984 }, { "name": "vision_tower.vision_model.pre_layrnorm.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 10667520 } ], "md5sum": "f14e04aef0b96f4ce0f53e11084c3987" } ] }