DeepSeek-V2-Lite-Chat-q4f32_1-MLC / ndarray-cache.json
riczhou's picture
Upload folder using huggingface_hub
f492e43 verified
{
"metadata": {
"ParamSize": 540,
"ParamBytes": 9828481024.0,
"BitsPerParam": 5.006075648161553
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
102400,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "aecba7f129dff063694b75eb9df73aef"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 104857600,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
102400,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 104857600,
"byteOffset": 0
}
],
"md5sum": "9c777fb06365103e905464b96ab47072"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 31601664,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
102400,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.norm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 13107200
},
{
"name": "lm_head.q_scale",
"shape": [
102400,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13111296
},
{
"name": "model.layers.0.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 26218496
},
{
"name": "model.layers.0.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 29364224
},
{
"name": "model.layers.0.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 29757440
},
{
"name": "model.layers.0.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 30347264
},
{
"name": "model.layers.0.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 30420992
},
{
"name": "model.layers.0.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 30422016
},
{
"name": "model.layers.0.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 31470592
}
],
"md5sum": "3edbb7d4fea234666177e0002d8012e1"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 27574272,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 2097152
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
21888,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22413312,
"byteOffset": 2359296
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
21888,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2801664,
"byteOffset": 24772608
}
],
"md5sum": "5c550c39b3503f125dc0e8939f6db19e"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.1.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "2cb32a075e79c2e7977117d320c7158a"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.1.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "e119fa8ffb9e4331d8e160b7787551d9"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.1.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "130e1f34c853f056a36481565657501f"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 30352384,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
2048,
1368
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 11206656,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
2048,
342
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1400832,
"byteOffset": 11206656
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 12607488
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 12611584
},
{
"name": "model.layers.1.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 12615680
},
{
"name": "model.layers.1.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 15761408
},
{
"name": "model.layers.1.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 16154624
},
{
"name": "model.layers.1.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 16744448
},
{
"name": "model.layers.1.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 16818176
},
{
"name": "model.layers.1.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 16819200
},
{
"name": "model.layers.1.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 17867776
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 17998848
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20096000
},
{
"name": "model.layers.1.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 20358144
},
{
"name": "model.layers.1.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 20620288
},
{
"name": "model.layers.1.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 26387456
},
{
"name": "model.layers.1.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 27108352
},
{
"name": "model.layers.1.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 29991936
}
],
"md5sum": "b44c15db5d762e1e20c897ce21ab125c"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.2.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "bf13ebf4204d41ac2c696d2746cb8109"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.2.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "c4fe0af0dd214a94b671e4fa54c53199"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.2.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "a5858f0200fec84b0f14556c372a0380"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.1.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.2.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.2.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.2.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.2.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.2.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.2.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.2.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.2.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.2.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.2.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.2.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.2.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "5a1448ed18f2f0d9c4e15ca551cbbf8b"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.3.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "8e265f4994f7b62590c5e6f0081359cb"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.3.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "6836686ab66e573994f265475180fec2"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.3.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "1963f0f3e0b5420326fa62c238c6c937"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.2.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.3.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.3.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.3.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.3.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.3.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.3.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.3.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.3.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.3.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.3.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.3.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.3.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "04724f86db655f3a1c1205c2bd5a62cd"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.4.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "2dcb861888a6ef52f333dcee6afa955f"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.4.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "241a231e639e927c55bb30a338ecb319"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.4.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "9ee7b1730034f7d27a08ad750c633faa"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.3.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.4.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.4.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.4.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.4.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.4.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.4.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.4.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.4.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.4.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.4.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.4.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.4.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "2ef24a770fc746dceaefbefa282dc530"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.5.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "3a225c76960137d8241b870ebe4726bd"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.5.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "7808ba6cd5d693d41b62fb7db932c2be"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.5.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "5425fd0fd748c4f8899f0a4c0d02060c"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.4.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.5.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.5.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.5.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.5.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.5.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.5.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.5.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.5.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.5.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.5.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.5.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.5.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "12f2aad47ba38fea275645c8f952ca35"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.6.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "2c5200430f16add6e4d28373b8eb5042"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.6.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "7df4cef8e3c4fa2b8bc082389d0e416a"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.6.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "db0c6dcd77691f83cd499c3394c6d503"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.5.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.6.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.6.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.6.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.6.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.6.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.6.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.6.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.6.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.6.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.6.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.6.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.6.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "d55c4cc540d9aad19a052efaaa1a1de6"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.7.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "341e0ea06cbb6260bc0150a7ef58719e"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.7.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "e4105bb9a01df049f1df6f9f4b8b84e1"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.7.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "7376bb84a86d5815102a7b8c2c52e4ee"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.6.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.7.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.7.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.7.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.7.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.7.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.7.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.7.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.7.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.7.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.7.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.7.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.7.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "42739b8922ca745d33d7e07e3375d857"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.8.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "d0b521b707b9eaf7780a9b9b5864708d"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.8.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "83e8bd7d70cc18b7696745b343922231"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.8.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "0b37805532160a011505e7ac5a99b5bc"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.7.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.8.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.8.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.8.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.8.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.8.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.8.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.8.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.8.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.8.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.8.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.8.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.8.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "2cdca14a871e54eb7fecb5bba21a7bc1"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.9.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "0f513f9a0d03775b32821402934b7676"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.9.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "d3b9afb8b1f8460fe9af2f496d0cf6bb"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.9.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "5dbd8536bf1148115653b0bc436fd155"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.8.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.9.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.9.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.9.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.9.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.9.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.9.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.9.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.9.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.9.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.9.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.9.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.9.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "feecefb1c56bf343ffe3714060c27550"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.10.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "d16eaba8c42bdf5d2bfd17dd51e95659"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.10.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "dafb0ac8d6a35846a4cf4b237f71d2d6"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.10.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "9df235058e7f7d718a6eaa1db5764e53"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.9.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.10.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.10.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.10.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.10.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.10.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.10.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.10.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.10.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.10.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.10.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.10.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.10.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "40baf16d40e0fe719d6c90e13435bb56"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.11.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "4a5e3d29c874445c2b1e691799703552"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.11.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "f7d823c624b6069389338731955626d9"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.11.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "6b4e8b74259e5cfc71ec8cac9ce88e81"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.10.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.11.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.11.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.11.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.11.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.11.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.11.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.11.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.11.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.11.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.11.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.11.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.11.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "6efdb660233d84db84220a0df941207e"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.12.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "ec3c62c29a5c880dc1676b6136911932"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.12.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "f786eedd3709cb65e05567c7b1c2e968"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.12.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "93dc1b693e9442e9d1802297bc9d8080"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.11.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.12.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.12.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.12.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.12.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.12.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.12.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.12.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.12.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.12.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.12.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.12.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.12.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "f5cf59fe827ba499af3622ef68b53104"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.13.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "4c4d6cb1461facc9765a268ba04fa6ee"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.13.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "641ebb59a0d5426d628c8c9c5ca64361"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.13.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "bd6316e9a7e6f95bfa26a2d3f9ccd2e3"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.12.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.13.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.13.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.13.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.13.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.13.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.13.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.13.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.13.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.13.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.13.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.13.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.13.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "360346127e1a16e9dff6db2ae9055787"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.14.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "9a2ca6638a396fbb824388bb7a4ed793"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.14.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "b832d5df54d210efacd31fd310915b31"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.14.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "dc976e7be7cfaf6325e789a1af2ebe3e"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.13.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.14.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.14.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.14.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.14.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.14.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.14.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.14.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.14.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.14.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.14.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.14.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.14.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "683f47a8fc4093d4a015a0541b2be473"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.15.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "5c823c361d0272b251f5ffb006c6fa4a"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.15.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "1300ab997707e3bac452b549f6f1b791"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.15.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "daf7b029dac6a8ca4f625e1643d2eeed"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.14.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.15.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.15.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.15.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.15.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.15.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.15.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.15.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.15.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.15.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.15.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.15.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.15.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "7e38de5399b867c755340e40361477d8"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.16.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "7683e954027028f578b46e774f2e4e0d"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.16.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "c30b17863a3bc75384e74e9c07cd6d1d"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.16.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "7df0c8975e0966abcaec8a6d8dc3666e"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.15.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.16.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.16.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.16.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.16.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.16.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.16.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.16.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.16.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.16.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.16.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.16.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.16.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "7f16dd445dfb75203d04689a8de74733"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.17.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "72458d312469b9c35a845ad0ea760977"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.17.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "4da660ec13d5680d859634572e3a790b"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.17.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "7823fda41ec073e680b3f4a3b1d9b2bf"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.16.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.17.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.17.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.17.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.17.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.17.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.17.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.17.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.17.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.17.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.17.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.17.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.17.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "c77169eca0567ead25d2b4f807f85eb6"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.18.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "c3e81e764b5456e48a603942b80274a6"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.18.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "23411bbfabdec689846a2cd064ec08f6"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.18.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "358262ba79ac0495b92f7e6fcb5e2ae7"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.17.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.18.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.18.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.18.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.18.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.18.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.18.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.18.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.18.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.18.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.18.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.18.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.18.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "87a3f84d55f0964a343e6d255c30a29b"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.19.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "e1cc38cb3dec8a8fb8fa08af72eae37c"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.19.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "45a0f508d0bb33224b060fdde2fab9fd"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.19.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "dbbeb4832b7529767841dda7eb703467"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.18.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.19.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.19.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.19.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.19.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.19.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.19.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.19.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.19.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.19.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.19.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.19.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.19.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "cb00bc5bc69031f2baa0a818593a6308"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.20.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "89040711e6aa2b61e405e0b977f0e5c7"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.20.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "8da6d8dc34dcfc03c6c08905cfe79437"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.20.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "da5cf3a28182036c810447738a13a66b"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.19.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.20.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.20.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.20.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.20.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.20.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.20.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.20.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.20.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.20.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.20.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.20.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.20.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "bd0bc40aa2beaa94a66609ba234eb98a"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.21.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "6fe4ea05e1d514efc23566ad8897c1a4"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.21.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "edb10d3a0362259c844abd681f00b57f"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.21.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "8cf790f09998c2ee8726dadbee27776d"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.20.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.21.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.21.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.21.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.21.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.21.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.21.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.21.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.21.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.21.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.21.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.21.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.21.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "8f9356ac0ce5c7a19349c4090f59fb30"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.22.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "e68534b8ad5519b90538aa42b53c043e"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.22.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "48e8b10ac27055abfffa65dcc62dcdff"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.22.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "be93840acabfc15bed8960afd9788afb"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.21.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.22.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.22.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.22.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.22.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.22.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.22.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.22.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.22.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.22.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.22.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.22.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.22.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "79215ec8e21962243d7840ddc1ce758f"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.23.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "db7acc0bacb9969ca6b7df2b96a81188"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.23.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "ddc3538ccdf293d97478b8cf1ae44c93"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.23.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "533d2d37d864f37693c672b4e168294f"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.22.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.23.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.23.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.23.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.23.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.23.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.23.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.23.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.23.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.23.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.23.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.23.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.23.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "240d7a194858bfbcd7faa56a87bcbeb5"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.24.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "033972268afd719b33a7b0d0c4ee69ea"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.24.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "ed51159afa8c766a3fc9a5632680060b"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.24.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "83233d1114a93cc13911a0d8c67dd07d"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.23.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.24.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.24.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.24.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.24.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.24.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.24.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.24.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.24.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.24.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.24.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.24.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.24.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "5a2280321bdb6cd01a8d0b1809a32c46"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.25.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "adcadc9e1b5aa18901949119936e60c8"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.25.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "0367e10b699e7d7629f0ca112235a940"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.25.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "b43ae0388648936b710472b2a9e3e431"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.24.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.25.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.25.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.25.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.25.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.25.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.25.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.25.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.25.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.25.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.25.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.25.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.25.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "8f4e4e60c9df90e548a0520c047af261"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 184549376,
"records": [
{
"name": "model.layers.26.mlp.moe_gate_up_proj.q_weight",
"shape": [
64,
2816,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 184549376,
"byteOffset": 0
}
],
"md5sum": "aeddb21991e6573caede2d7c68185015"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.26.mlp.moe_gate_up_proj.q_scale",
"shape": [
64,
2816,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "ec689d3391d173aace5532bf59515e50"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 92274688,
"records": [
{
"name": "model.layers.26.mlp.moe_down_proj.q_weight",
"shape": [
64,
2048,
176
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 92274688,
"byteOffset": 0
}
],
"md5sum": "f88861ef7b7936346d543a2d69b50ac8"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 29279232,
"records": [
{
"name": "model.layers.25.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
},
{
"name": "model.layers.26.self_attn.q_proj.q_weight",
"shape": [
3072,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11542528
},
{
"name": "model.layers.26.self_attn.q_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14688256
},
{
"name": "model.layers.26.self_attn.kv_a_proj_with_mqa.q_weight",
"shape": [
576,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 15081472
},
{
"name": "model.layers.26.self_attn.kv_a_proj_with_mqa.q_scale",
"shape": [
576,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 15671296
},
{
"name": "model.layers.26.self_attn.kv_a_layernorm.weight",
"shape": [
512
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1024,
"byteOffset": 15745024
},
{
"name": "model.layers.26.self_attn.kv_b_proj.q_weight",
"shape": [
4096,
64
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 15746048
},
{
"name": "model.layers.26.self_attn.kv_b_proj.q_scale",
"shape": [
4096,
16
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 16794624
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
2048,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 16925696
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
2048,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19022848
},
{
"name": "model.layers.26.mlp.gate.weight",
"shape": [
64,
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19284992
},
{
"name": "model.layers.26.mlp.shared_experts.gate_up_proj.q_weight",
"shape": [
5632,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 5767168,
"byteOffset": 19547136
},
{
"name": "model.layers.26.mlp.shared_experts.gate_up_proj.q_scale",
"shape": [
5632,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 720896,
"byteOffset": 25314304
},
{
"name": "model.layers.26.mlp.shared_experts.down_proj.q_weight",
"shape": [
2048,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 26035200
},
{
"name": "model.layers.26.mlp.shared_experts.down_proj.q_scale",
"shape": [
2048,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28918784
}
],
"md5sum": "f7a67a47df2a804e3296f0e30e5ff7a6"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 11542528,
"records": [
{
"name": "model.layers.26.mlp.moe_down_proj.q_scale",
"shape": [
64,
2048,
44
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 11534336,
"byteOffset": 0
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11534336
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11538432
}
],
"md5sum": "b08e08a725e528c81d407433a0ada2a0"
}
]
}