|
{ |
|
"metadata": { |
|
"ParamSize": 540, |
|
"ParamBytes": 8839977984.0, |
|
"BitsPerParam": 4.502587776068809 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 104857600, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.q_weight", |
|
"shape": [ |
|
102400, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 104857600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6dd82b8c2aaad6807db3066b0112b335" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 104857600, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_weight", |
|
"shape": [ |
|
102400, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 104857600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6649ef6142a95b8d43b5c476cccb7ad2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31601664, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.q_scale", |
|
"shape": [ |
|
102400, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "lm_head.q_scale", |
|
"shape": [ |
|
102400, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 13111296 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 26218496 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 29364224 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 29757440 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 30347264 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 30420992 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 30422016 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 31470592 |
|
} |
|
], |
|
"md5sum": "998db361280249541be759f27a5d7df1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27574272, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 2097152 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
21888, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22413312, |
|
"byteOffset": 2359296 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
21888, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2801664, |
|
"byteOffset": 24772608 |
|
} |
|
], |
|
"md5sum": "d049c31f5061a2426d947d6029140830" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e99431c2ae18ece1d3576ef4be726ff7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5baf7ad90f8ea1a45aa0f5fe9893480b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "943a1f4e115f0c93838eb6ecad5659a0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30352384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
1368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11206656, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
342 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1400832, |
|
"byteOffset": 11206656 |
|
}, |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 12607488 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 12611584 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 12615680 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 15761408 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 16154624 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 16744448 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 16818176 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 16819200 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 17867776 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 17998848 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 20096000 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 20358144 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 20620288 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 26387456 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 27108352 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 29991936 |
|
} |
|
], |
|
"md5sum": "c7b6a255f6f8b0461a4bbc26b9654668" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6fe453f4f77ec11c2595a5538230424a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "32c707e0566a49124ca78acca06e51b7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d72738a5ae536e58e35378156cde7290" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "d7a9eca09d2dadc13a8d445066841600" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "86e74f3b39cab2ff5595d9d25e18bff2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a6b74013ffe5177555f93028f39204c8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "70da319aecc57bb835a5fd3f918b11ac" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "1a74c9967d8fe4dc2e67ec74189bee80" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "21e2e15f85f6ff03a7e073d9236bd13c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9f0a1668c61ae3b9bf262e7501f527df" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8e7f127c4e5b8e01367ea4862cb264d5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "9f4e6764f4a03b89210cf8f9409afc7c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e95c527175c7d73812897dfc81fd2062" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f7224d16114a32b532f289cface91e9f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0decee8cc1a9fb1d3fc56e10866cc486" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "e1b02f51cc82e144651ea48ee7c9b920" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e9f59180fe097225ccb3822a56bdb575" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ed4caeda7ca259ecd7c1c23a4fba256c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5e77522c7a5696bb82181dd3f09ee192" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "37b57397fe736352f8c89f630a3be793" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0dfc941c0c288cf9aee63d8b4c01d337" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6fc55fb006326f41f3d4b32dbf59892e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9ab6c7a95b69a8092d0cbc027c8767c4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "8081e9008497dddfa99d3ad42583d92d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e21f8d243c818993c9237719b243aee9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f1de4bd332c76d0d856fc4c4e08392de" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "227c5520920fed32482f4f8f22544a74" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "3150af85be95b9adf5da029595492f55" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "73f003eab48a6f237c6db1803c1363e6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d8e4a2fc400eeaa563aff31a1ba5ecd4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "24a14f558861e2b18e76aa286a7ae340" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "62b941c967a4983e0d1ba3eaf2945958" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a286f5a5700cc200a22c15ed432b3a15" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1c7471b8b884678214e16b6b8897dd74" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4666d9cead0c1d59e4587d0b09cdad05" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "dcbd707988c939bfa811911b7cc534e2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5602565f33bf51226ee708fcf61ee362" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ced262a95e26e0afb5e771971a23625b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "28cf52b12a30ec62fca0b475417c7b6f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "67eb5cd00764a073195f3d08001e0dca" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4476b19375d687aa89cccebbf62a42e2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "63c7cce189cf49e4be0041024b8986e6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c05495177c3e040702e5931a01217e7e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "57904b2840903763fc322de273395ac6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "34969bb0fe0ed23c7911d9567b46a95c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "28e083f3303b49b4cc70def937227ac7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c3fd4b4a25496c268a611a3c08efc6bc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "82759502daa0420629e476592346a5e5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c4067db6ad8cd738d3da86000d35b236" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "180cec11f2136cd611bde37e3eead8c7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "36fdfd2be7b99b9be27be97a3c452c01" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "1709f62812e963176c533ac70be1ab4c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "30d01f8d167768ad62f586d612a46658" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "994889c991cf21bf8eaee6a9c8b26c59" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7183a2a971ad10a6509801fd1835a6e7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "f69b7c426bcc3545f9450f70989fbb91" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "943a5584db1f74dc1e5b3e930709cbf5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "48190e96d7ff0de7b30b39a0340e6f45" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "08e8e4cbb4004727c5aba3e854aa58b4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "fa5a8ca4c5f934833d3d50563c7c4032" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d2007a105ea7937414a4778c7c4c8fa4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0ce4b2e3746d3311090ae35d3170cd70" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "81988ef9860652402ac7c68ee4510eca" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.16.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "5b5da37fd1b677a9ca60823a71dd7a12" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a3b809b557c8350965a691a2906b7ba9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0b7216963c2cbb112cb078c6058ff78c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f01d45d809d40f22dabd2fb5e0278069" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "02cd8d883d144a9281770aae15bd5118" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8d3b7c34f7a0dbf4a8f605ae2cee089a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ee759a1c7553a9f2c01c42985bf6e716" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "57f8c3ce1c6c77ac4c865698317720ed" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.18.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "8e820e83bd55f395a6fb0f9b18aefc72" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fb02f81519dfb73c1f846c562cc15736" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2e4434b9933c8378b87871c591238bff" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0b9ca7bc749c4e20b16d1bb078eacb04" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.19.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "f52d7d349614ecdcda7958c9360a41d5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "57e210bad0d69abb861e716d477c7b51" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "90ff7999e76802222b3503e525a2c86c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8d47b2bdfecc99d9b7829147c72f776a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "6cca1b666844411769c117973dba50c9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b9f959123162cbc11676425c105c7fd7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "61bb52652c1a3f68d2cd9c4e6c95191a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "52fdb442b10c2f8b5666b19942c013df" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "066d4bcb474f3586b2c62abb0e301e34" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c60d8702e2d0035f86622296b6093c6f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0f26cea315a64866e5b590975d5dbcf5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "622f13522a4c25b5b020d314267bf586" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.22.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "6d43132d14e5bb81cdf0057f7b5d6d07" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fa3da57a55832677bba5c4518ffa57f0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "53df55a2cd41b7e75ad5f887034bc61c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e8279e9628bb18467f20e897b56ce31b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_99.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.23.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "51517afdd52e3365c4c8fc43e8248d99" |
|
}, |
|
{ |
|
"dataPath": "params_shard_100.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3b183acdc50a04ef7bc6d5039cad6cf9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_101.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "20b9475208c14a7fc3622456e8e0a443" |
|
}, |
|
{ |
|
"dataPath": "params_shard_102.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "359801fbf0f827f9ca297130f9618733" |
|
}, |
|
{ |
|
"dataPath": "params_shard_103.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.24.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "0df182c720f50ef98afe4d2db580c43c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_104.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6390d63d37241acbb38cddd8273ce0d5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_105.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "804d59b8afc33c4b676d5e4e7d019dd2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_106.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7a60324864cc1f63b857518b83f253b7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_107.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.25.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "359f4392376b7872d17236156dc197f0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_108.bin", |
|
"format": "raw-shard", |
|
"nbytes": 11542528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.26.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
} |
|
], |
|
"md5sum": "4edf8d458365bfdc46d0481525165101" |
|
} |
|
] |
|
} |