|
{ |
|
"metadata": { |
|
"ParamSize": 540, |
|
"ParamBytes": 9828481024.0, |
|
"BitsPerParam": 5.006075648161553 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 104857600, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.q_weight", |
|
"shape": [ |
|
102400, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 104857600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aecba7f129dff063694b75eb9df73aef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 104857600, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_weight", |
|
"shape": [ |
|
102400, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 104857600, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9c777fb06365103e905464b96ab47072" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31601664, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.q_scale", |
|
"shape": [ |
|
102400, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 13107200 |
|
}, |
|
{ |
|
"name": "lm_head.q_scale", |
|
"shape": [ |
|
102400, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 13107200, |
|
"byteOffset": 13111296 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 26218496 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 29364224 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 29757440 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 30347264 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 30420992 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 30422016 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 31470592 |
|
} |
|
], |
|
"md5sum": "3edbb7d4fea234666177e0002d8012e1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27574272, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 2097152 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
21888, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22413312, |
|
"byteOffset": 2359296 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
21888, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2801664, |
|
"byteOffset": 24772608 |
|
} |
|
], |
|
"md5sum": "5c550c39b3503f125dc0e8939f6db19e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2cb32a075e79c2e7977117d320c7158a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e119fa8ffb9e4331d8e160b7787551d9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "130e1f34c853f056a36481565657501f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30352384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
1368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11206656, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
342 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1400832, |
|
"byteOffset": 11206656 |
|
}, |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 12607488 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 12611584 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 12615680 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 15761408 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 16154624 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 16744448 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 16818176 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 16819200 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 17867776 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 17998848 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 20096000 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 20358144 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 20620288 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 26387456 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 27108352 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 29991936 |
|
} |
|
], |
|
"md5sum": "b44c15db5d762e1e20c897ce21ab125c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bf13ebf4204d41ac2c696d2746cb8109" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c4fe0af0dd214a94b671e4fa54c53199" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a5858f0200fec84b0f14556c372a0380" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "5a1448ed18f2f0d9c4e15ca551cbbf8b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8e265f4994f7b62590c5e6f0081359cb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6836686ab66e573994f265475180fec2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1963f0f3e0b5420326fa62c238c6c937" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "04724f86db655f3a1c1205c2bd5a62cd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2dcb861888a6ef52f333dcee6afa955f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "241a231e639e927c55bb30a338ecb319" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9ee7b1730034f7d27a08ad750c633faa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "2ef24a770fc746dceaefbefa282dc530" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3a225c76960137d8241b870ebe4726bd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7808ba6cd5d693d41b62fb7db932c2be" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5425fd0fd748c4f8899f0a4c0d02060c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "12f2aad47ba38fea275645c8f952ca35" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2c5200430f16add6e4d28373b8eb5042" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7df4cef8e3c4fa2b8bc082389d0e416a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "db0c6dcd77691f83cd499c3394c6d503" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "d55c4cc540d9aad19a052efaaa1a1de6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "341e0ea06cbb6260bc0150a7ef58719e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e4105bb9a01df049f1df6f9f4b8b84e1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7376bb84a86d5815102a7b8c2c52e4ee" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "42739b8922ca745d33d7e07e3375d857" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d0b521b707b9eaf7780a9b9b5864708d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "83e8bd7d70cc18b7696745b343922231" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0b37805532160a011505e7ac5a99b5bc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "2cdca14a871e54eb7fecb5bba21a7bc1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0f513f9a0d03775b32821402934b7676" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d3b9afb8b1f8460fe9af2f496d0cf6bb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5dbd8536bf1148115653b0bc436fd155" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "feecefb1c56bf343ffe3714060c27550" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d16eaba8c42bdf5d2bfd17dd51e95659" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dafb0ac8d6a35846a4cf4b237f71d2d6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9df235058e7f7d718a6eaa1db5764e53" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "40baf16d40e0fe719d6c90e13435bb56" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4a5e3d29c874445c2b1e691799703552" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f7d823c624b6069389338731955626d9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6b4e8b74259e5cfc71ec8cac9ce88e81" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "6efdb660233d84db84220a0df941207e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ec3c62c29a5c880dc1676b6136911932" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f786eedd3709cb65e05567c7b1c2e968" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "93dc1b693e9442e9d1802297bc9d8080" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "f5cf59fe827ba499af3622ef68b53104" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4c4d6cb1461facc9765a268ba04fa6ee" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "641ebb59a0d5426d628c8c9c5ca64361" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bd6316e9a7e6f95bfa26a2d3f9ccd2e3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "360346127e1a16e9dff6db2ae9055787" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9a2ca6638a396fbb824388bb7a4ed793" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b832d5df54d210efacd31fd310915b31" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dc976e7be7cfaf6325e789a1af2ebe3e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "683f47a8fc4093d4a015a0541b2be473" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5c823c361d0272b251f5ffb006c6fa4a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1300ab997707e3bac452b549f6f1b791" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "daf7b029dac6a8ca4f625e1643d2eeed" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "7e38de5399b867c755340e40361477d8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7683e954027028f578b46e774f2e4e0d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c30b17863a3bc75384e74e9c07cd6d1d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7df0c8975e0966abcaec8a6d8dc3666e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "7f16dd445dfb75203d04689a8de74733" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "72458d312469b9c35a845ad0ea760977" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4da660ec13d5680d859634572e3a790b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7823fda41ec073e680b3f4a3b1d9b2bf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.16.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "c77169eca0567ead25d2b4f807f85eb6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c3e81e764b5456e48a603942b80274a6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "23411bbfabdec689846a2cd064ec08f6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "358262ba79ac0495b92f7e6fcb5e2ae7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "87a3f84d55f0964a343e6d255c30a29b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e1cc38cb3dec8a8fb8fa08af72eae37c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "45a0f508d0bb33224b060fdde2fab9fd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dbbeb4832b7529767841dda7eb703467" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.18.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "cb00bc5bc69031f2baa0a818593a6308" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "89040711e6aa2b61e405e0b977f0e5c7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8da6d8dc34dcfc03c6c08905cfe79437" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "da5cf3a28182036c810447738a13a66b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.19.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "bd0bc40aa2beaa94a66609ba234eb98a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6fe4ea05e1d514efc23566ad8897c1a4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "edb10d3a0362259c844abd681f00b57f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8cf790f09998c2ee8726dadbee27776d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "8f9356ac0ce5c7a19349c4090f59fb30" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e68534b8ad5519b90538aa42b53c043e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "48e8b10ac27055abfffa65dcc62dcdff" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "be93840acabfc15bed8960afd9788afb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "79215ec8e21962243d7840ddc1ce758f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "db7acc0bacb9969ca6b7df2b96a81188" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ddc3538ccdf293d97478b8cf1ae44c93" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "533d2d37d864f37693c672b4e168294f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.22.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "240d7a194858bfbcd7faa56a87bcbeb5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "033972268afd719b33a7b0d0c4ee69ea" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ed51159afa8c766a3fc9a5632680060b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "83233d1114a93cc13911a0d8c67dd07d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_99.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.23.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "5a2280321bdb6cd01a8d0b1809a32c46" |
|
}, |
|
{ |
|
"dataPath": "params_shard_100.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "adcadc9e1b5aa18901949119936e60c8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_101.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0367e10b699e7d7629f0ca112235a940" |
|
}, |
|
{ |
|
"dataPath": "params_shard_102.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b43ae0388648936b710472b2a9e3e431" |
|
}, |
|
{ |
|
"dataPath": "params_shard_103.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.24.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "8f4e4e60c9df90e548a0520c047af261" |
|
}, |
|
{ |
|
"dataPath": "params_shard_104.bin", |
|
"format": "raw-shard", |
|
"nbytes": 184549376, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.moe_gate_up_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2816, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 184549376, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aeddb21991e6573caede2d7c68185015" |
|
}, |
|
{ |
|
"dataPath": "params_shard_105.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.moe_gate_up_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2816, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ec689d3391d173aace5532bf59515e50" |
|
}, |
|
{ |
|
"dataPath": "params_shard_106.bin", |
|
"format": "raw-shard", |
|
"nbytes": 92274688, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.moe_down_proj.q_weight", |
|
"shape": [ |
|
64, |
|
2048, |
|
176 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 92274688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f88861ef7b7936346d543a2d69b50ac8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_107.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29279232, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.25.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.q_proj.q_weight", |
|
"shape": [ |
|
3072, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 11542528 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.q_proj.q_scale", |
|
"shape": [ |
|
3072, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 393216, |
|
"byteOffset": 14688256 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.kv_a_proj_with_mqa.q_weight", |
|
"shape": [ |
|
576, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 589824, |
|
"byteOffset": 15081472 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.kv_a_proj_with_mqa.q_scale", |
|
"shape": [ |
|
576, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73728, |
|
"byteOffset": 15671296 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.kv_a_layernorm.weight", |
|
"shape": [ |
|
512 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1024, |
|
"byteOffset": 15745024 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.kv_b_proj.q_weight", |
|
"shape": [ |
|
4096, |
|
64 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 15746048 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.kv_b_proj.q_scale", |
|
"shape": [ |
|
4096, |
|
16 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072, |
|
"byteOffset": 16794624 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 16925696 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19022848 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.gate.weight", |
|
"shape": [ |
|
64, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 19284992 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.shared_experts.gate_up_proj.q_weight", |
|
"shape": [ |
|
5632, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5767168, |
|
"byteOffset": 19547136 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.shared_experts.gate_up_proj.q_scale", |
|
"shape": [ |
|
5632, |
|
64 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 720896, |
|
"byteOffset": 25314304 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.shared_experts.down_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
352 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2883584, |
|
"byteOffset": 26035200 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.shared_experts.down_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
88 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 360448, |
|
"byteOffset": 28918784 |
|
} |
|
], |
|
"md5sum": "f7a67a47df2a804e3296f0e30e5ff7a6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_108.bin", |
|
"format": "raw-shard", |
|
"nbytes": 11542528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.moe_down_proj.q_scale", |
|
"shape": [ |
|
64, |
|
2048, |
|
44 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11534336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.26.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11534336 |
|
}, |
|
{ |
|
"name": "model.layers.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11538432 |
|
} |
|
], |
|
"md5sum": "b08e08a725e528c81d407433a0ada2a0" |
|
} |
|
] |
|
} |