DeepSeek-V2-Lite-Chat-q4f16_1-MLC / ndarray-cache.json
riczhou's picture
Upload folder using huggingface_hub
e19ead7 verified
{
"metadata": {
"ParamSize": 540,
"ParamBytes": 8839977984.0,
"BitsPerParam": 4.502587776068809
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
102400,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "6dd82b8c2aaad6807db3066b0112b335"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
102400,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "6649ef6142a95b8d43b5c476cccb7ad2"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 31601664,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
102400,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 13107200
},
{
"name": "lm_head.q_scale",
"shape": [
102400,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13111296
},
{
"name": "model.layers.0.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 26218496
},
{
"name": "model.layers.0.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 29364224
},
{
"name": "model.layers.0.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 29757440
},
{
"name": "model.layers.0.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 30347264
},
{
"name": "model.layers.0.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 30420992
},
{
"name": "model.layers.0.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 30422016
},
{
"name": "model.layers.0.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 31470592
}
],
"md5sum": "998db361280249541be759f27a5d7df1"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 27574272,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 2097152
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
21888,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22413312,
"byteOffset": 2359296
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
21888,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2801664,
"byteOffset": 24772608
}
],
"md5sum": "d049c31f5061a2426d947d6029140830"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.1.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "e99431c2ae18ece1d3576ef4be726ff7"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.1.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "5baf7ad90f8ea1a45aa0f5fe9893480b"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.1.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "943a1f4e115f0c93838eb6ecad5659a0"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 30352384,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
2048,
1368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11206656,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
2048,
342
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1400832,
"byteOffset": 11206656
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 12607488
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 12611584
},
{
"name": "model.layers.1.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 12615680
},
{
"name": "model.layers.1.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 15761408
},
{
"name": "model.layers.1.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 16154624
},
{
"name": "model.layers.1.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 16744448
},
{
"name": "model.layers.1.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 16818176
},
{
"name": "model.layers.1.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 16819200
},
{
"name": "model.layers.1.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 17867776
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 17998848
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20096000
},
{
"name": "model.layers.1.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20358144
},
{
"name": "model.layers.1.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 20620288
},
{
"name": "model.layers.1.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 26387456
},
{
"name": "model.layers.1.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 27108352
},
{
"name": "model.layers.1.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 29991936
}
],
"md5sum": "c7b6a255f6f8b0461a4bbc26b9654668"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.2.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "6fe453f4f77ec11c2595a5538230424a"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.2.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "32c707e0566a49124ca78acca06e51b7"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.2.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "d72738a5ae536e58e35378156cde7290"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.1.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.2.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.2.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.2.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.2.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.2.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.2.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.2.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.2.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.2.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.2.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.2.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.2.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "d7a9eca09d2dadc13a8d445066841600"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.3.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "86e74f3b39cab2ff5595d9d25e18bff2"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.3.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "a6b74013ffe5177555f93028f39204c8"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.3.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "70da319aecc57bb835a5fd3f918b11ac"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.2.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.3.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.3.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.3.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.3.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.3.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.3.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.3.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.3.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.3.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.3.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.3.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.3.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "1a74c9967d8fe4dc2e67ec74189bee80"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.4.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "21e2e15f85f6ff03a7e073d9236bd13c"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.4.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "9f0a1668c61ae3b9bf262e7501f527df"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.4.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "8e7f127c4e5b8e01367ea4862cb264d5"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.3.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.4.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.4.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.4.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.4.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.4.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.4.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.4.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.4.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.4.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.4.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.4.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.4.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "9f4e6764f4a03b89210cf8f9409afc7c"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.5.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "e95c527175c7d73812897dfc81fd2062"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.5.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "f7224d16114a32b532f289cface91e9f"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.5.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "0decee8cc1a9fb1d3fc56e10866cc486"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.4.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.5.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.5.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.5.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.5.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.5.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.5.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.5.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.5.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.5.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.5.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.5.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.5.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "e1b02f51cc82e144651ea48ee7c9b920"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.6.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "e9f59180fe097225ccb3822a56bdb575"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.6.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "ed4caeda7ca259ecd7c1c23a4fba256c"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.6.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "5e77522c7a5696bb82181dd3f09ee192"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.5.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.6.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.6.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.6.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.6.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.6.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.6.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.6.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.6.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.6.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.6.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.6.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.6.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "37b57397fe736352f8c89f630a3be793"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.7.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "0dfc941c0c288cf9aee63d8b4c01d337"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.7.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "6fc55fb006326f41f3d4b32dbf59892e"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.7.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "9ab6c7a95b69a8092d0cbc027c8767c4"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.6.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.7.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.7.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.7.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.7.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.7.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.7.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.7.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.7.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.7.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.7.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.7.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.7.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "8081e9008497dddfa99d3ad42583d92d"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.8.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "e21f8d243c818993c9237719b243aee9"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.8.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "f1de4bd332c76d0d856fc4c4e08392de"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.8.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "227c5520920fed32482f4f8f22544a74"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.7.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.8.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.8.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.8.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.8.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.8.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.8.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.8.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.8.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.8.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.8.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.8.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.8.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "3150af85be95b9adf5da029595492f55"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.9.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "73f003eab48a6f237c6db1803c1363e6"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.9.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "d8e4a2fc400eeaa563aff31a1ba5ecd4"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.9.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "24a14f558861e2b18e76aa286a7ae340"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.8.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.9.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.9.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.9.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.9.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.9.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.9.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.9.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.9.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.9.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.9.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.9.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.9.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "62b941c967a4983e0d1ba3eaf2945958"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.10.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "a286f5a5700cc200a22c15ed432b3a15"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.10.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "1c7471b8b884678214e16b6b8897dd74"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.10.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "4666d9cead0c1d59e4587d0b09cdad05"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.9.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.10.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.10.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.10.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.10.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.10.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.10.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.10.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.10.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.10.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.10.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.10.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.10.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "dcbd707988c939bfa811911b7cc534e2"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.11.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "5602565f33bf51226ee708fcf61ee362"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.11.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "ced262a95e26e0afb5e771971a23625b"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.11.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "28cf52b12a30ec62fca0b475417c7b6f"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.10.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.11.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.11.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.11.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.11.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.11.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.11.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.11.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.11.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.11.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.11.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.11.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.11.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "67eb5cd00764a073195f3d08001e0dca"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.12.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "4476b19375d687aa89cccebbf62a42e2"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.12.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "63c7cce189cf49e4be0041024b8986e6"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.12.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "c05495177c3e040702e5931a01217e7e"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.11.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.12.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.12.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.12.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.12.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.12.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.12.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.12.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.12.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.12.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.12.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.12.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.12.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "57904b2840903763fc322de273395ac6"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.13.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "34969bb0fe0ed23c7911d9567b46a95c"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.13.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "28e083f3303b49b4cc70def937227ac7"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.13.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "c3fd4b4a25496c268a611a3c08efc6bc"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.12.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.13.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.13.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.13.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.13.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.13.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.13.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.13.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.13.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.13.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.13.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.13.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.13.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "82759502daa0420629e476592346a5e5"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.14.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "c4067db6ad8cd738d3da86000d35b236"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.14.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "180cec11f2136cd611bde37e3eead8c7"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.14.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "36fdfd2be7b99b9be27be97a3c452c01"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.13.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.14.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.14.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.14.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.14.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.14.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.14.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.14.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.14.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.14.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.14.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.14.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.14.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "1709f62812e963176c533ac70be1ab4c"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.15.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "30d01f8d167768ad62f586d612a46658"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.15.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "994889c991cf21bf8eaee6a9c8b26c59"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.15.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "7183a2a971ad10a6509801fd1835a6e7"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.14.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.15.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.15.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.15.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.15.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.15.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.15.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.15.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.15.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.15.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.15.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.15.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.15.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "f69b7c426bcc3545f9450f70989fbb91"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.16.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "943a5584db1f74dc1e5b3e930709cbf5"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.16.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "48190e96d7ff0de7b30b39a0340e6f45"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.16.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "08e8e4cbb4004727c5aba3e854aa58b4"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.15.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.16.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.16.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.16.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.16.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.16.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.16.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.16.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.16.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.16.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.16.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.16.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.16.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "fa5a8ca4c5f934833d3d50563c7c4032"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.17.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "d2007a105ea7937414a4778c7c4c8fa4"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.17.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "0ce4b2e3746d3311090ae35d3170cd70"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.17.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "81988ef9860652402ac7c68ee4510eca"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.16.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.17.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.17.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.17.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.17.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.17.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.17.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.17.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.17.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.17.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.17.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.17.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.17.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "5b5da37fd1b677a9ca60823a71dd7a12"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.18.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "a3b809b557c8350965a691a2906b7ba9"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.18.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "0b7216963c2cbb112cb078c6058ff78c"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.18.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "f01d45d809d40f22dabd2fb5e0278069"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.17.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.18.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.18.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.18.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.18.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.18.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.18.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.18.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.18.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.18.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.18.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.18.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.18.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "02cd8d883d144a9281770aae15bd5118"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.19.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "8d3b7c34f7a0dbf4a8f605ae2cee089a"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.19.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "ee759a1c7553a9f2c01c42985bf6e716"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.19.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "57f8c3ce1c6c77ac4c865698317720ed"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.18.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.19.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.19.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.19.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.19.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.19.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.19.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.19.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.19.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.19.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.19.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.19.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.19.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "8e820e83bd55f395a6fb0f9b18aefc72"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.20.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "fb02f81519dfb73c1f846c562cc15736"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.20.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "2e4434b9933c8378b87871c591238bff"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.20.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "0b9ca7bc749c4e20b16d1bb078eacb04"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.19.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.20.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.20.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.20.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.20.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.20.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.20.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.20.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.20.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.20.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.20.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.20.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.20.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "f52d7d349614ecdcda7958c9360a41d5"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.21.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "57e210bad0d69abb861e716d477c7b51"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.21.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "90ff7999e76802222b3503e525a2c86c"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.21.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "8d47b2bdfecc99d9b7829147c72f776a"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.20.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.21.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.21.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.21.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.21.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.21.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.21.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.21.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.21.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.21.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.21.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.21.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.21.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "6cca1b666844411769c117973dba50c9"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.22.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "b9f959123162cbc11676425c105c7fd7"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.22.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "61bb52652c1a3f68d2cd9c4e6c95191a"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.22.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "52fdb442b10c2f8b5666b19942c013df"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.21.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.22.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.22.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.22.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.22.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.22.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.22.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.22.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.22.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.22.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.22.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.22.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.22.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "066d4bcb474f3586b2c62abb0e301e34"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.23.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "c60d8702e2d0035f86622296b6093c6f"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.23.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "0f26cea315a64866e5b590975d5dbcf5"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.23.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "622f13522a4c25b5b020d314267bf586"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.22.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.23.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.23.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.23.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.23.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.23.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.23.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.23.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.23.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.23.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.23.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.23.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.23.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "6d43132d14e5bb81cdf0057f7b5d6d07"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.24.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "fa3da57a55832677bba5c4518ffa57f0"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.24.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "53df55a2cd41b7e75ad5f887034bc61c"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.24.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "e8279e9628bb18467f20e897b56ce31b"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.23.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.24.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.24.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.24.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.24.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.24.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.24.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.24.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.24.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.24.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.24.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.24.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.24.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "51517afdd52e3365c4c8fc43e8248d99"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.25.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "3b183acdc50a04ef7bc6d5039cad6cf9"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.25.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "20b9475208c14a7fc3622456e8e0a443"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.25.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "359801fbf0f827f9ca297130f9618733"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.24.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.25.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.25.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.25.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.25.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.25.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.25.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.25.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.25.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.25.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.25.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.25.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.25.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "0df182c720f50ef98afe4d2db580c43c"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.26.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "6390d63d37241acbb38cddd8273ce0d5"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.26.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "804d59b8afc33c4b676d5e4e7d019dd2"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.26.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "7a60324864cc1f63b857518b83f253b7"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.25.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.26.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.26.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.26.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.26.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.26.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.26.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.26.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.26.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.26.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.26.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.26.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.26.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "359f4392376b7872d17236156dc197f0"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 11542528,
"records": [
{
"name": "model.layers.26.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
}
],
"md5sum": "4edf8d458365bfdc46d0481525165101"
}
]
}