{ "metadata": { "ParamSize": 540, "ParamBytes": 9828481024.0, "BitsPerParam": 5.006075648161553 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 104857600, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 102400, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 104857600, "byteOffset": 0 } ], "md5sum": "aecba7f129dff063694b75eb9df73aef" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 104857600, "records": [ { "name": "lm_head.q_weight", "shape": [ 102400, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 104857600, "byteOffset": 0 } ], "md5sum": "9c777fb06365103e905464b96ab47072" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 31601664, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 102400, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.norm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 13107200 }, { "name": "lm_head.q_scale", "shape": [ 102400, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 13107200, "byteOffset": 13111296 }, { "name": "model.layers.0.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 26218496 }, { "name": "model.layers.0.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 29364224 }, { "name": "model.layers.0.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 29757440 }, { "name": "model.layers.0.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 30347264 }, { "name": "model.layers.0.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 30420992 }, { "name": "model.layers.0.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30422016 }, { "name": "model.layers.0.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 31470592 } ], "md5sum": "3edbb7d4fea234666177e0002d8012e1" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 27574272, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 2097152 }, { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 21888, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22413312, "byteOffset": 2359296 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 21888, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2801664, "byteOffset": 24772608 } ], "md5sum": "5c550c39b3503f125dc0e8939f6db19e" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 184549376, "records": [ { "name": "model.layers.1.mlp.moe_gate_up_proj.q_weight", "shape": [ 64, 2816, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 184549376, "byteOffset": 0 } ], "md5sum": "2cb32a075e79c2e7977117d320c7158a" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.1.mlp.moe_gate_up_proj.q_scale", "shape": [ 64, 2816, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "e119fa8ffb9e4331d8e160b7787551d9" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.1.mlp.moe_down_proj.q_weight", "shape": [ 64, 2048, 176 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "130e1f34c853f056a36481565657501f" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 30352384, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 2048, 1368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 11206656, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 2048, 342 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1400832, "byteOffset": 11206656 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 12607488 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 12611584 }, { "name": "model.layers.1.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 12615680 }, { "name": "model.layers.1.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 15761408 }, { "name": "model.layers.1.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 16154624 }, { "name": "model.layers.1.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 16744448 }, { "name": "model.layers.1.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 16818176 }, { "name": "model.layers.1.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 16819200 }, { "name": "model.layers.1.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 17867776 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 17998848 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 20096000 }, { "name": "model.layers.1.mlp.gate.weight", "shape": [ 64, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 20358144 }, { "name": "model.layers.1.mlp.shared_experts.gate_up_proj.q_weight", "shape": [ 5632, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 20620288 }, { "name": "model.layers.1.mlp.shared_experts.gate_up_proj.q_scale", "shape": [ 5632, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 720896, "byteOffset": 26387456 }, { "name": "model.layers.1.mlp.shared_experts.down_proj.q_weight", "shape": [ 2048, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 27108352 }, { "name": "model.layers.1.mlp.shared_experts.down_proj.q_scale", "shape": [ 2048, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 29991936 } ], "md5sum": "b44c15db5d762e1e20c897ce21ab125c" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 184549376, "records": [ { "name": "model.layers.2.mlp.moe_gate_up_proj.q_weight", "shape": [ 64, 2816, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 184549376, "byteOffset": 0 } ], "md5sum": "bf13ebf4204d41ac2c696d2746cb8109" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.2.mlp.moe_gate_up_proj.q_scale", "shape": [ 64, 2816, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "c4fe0af0dd214a94b671e4fa54c53199" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.2.mlp.moe_down_proj.q_weight", "shape": [ 64, 2048, 176 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "a5858f0200fec84b0f14556c372a0380" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 29279232, "records": [ { "name": "model.layers.1.mlp.moe_down_proj.q_scale", "shape": [ 64, 2048, 44 ], "dtype": "bfloat16", "format": "raw", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11534336 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11538432 }, { "name": "model.layers.2.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11542528 }, { "name": "model.layers.2.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 14688256 }, { "name": "model.layers.2.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 15081472 }, { "name": "model.layers.2.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 15671296 }, { "name": "model.layers.2.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 15745024 }, { "name": "model.layers.2.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15746048 }, { "name": "model.layers.2.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 16794624 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16925696 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19022848 }, { "name": "model.layers.2.mlp.gate.weight", "shape": [ 64, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19284992 }, { "name": "model.layers.2.mlp.shared_experts.gate_up_proj.q_weight", "shape": [ 5632, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 19547136 }, { "name": "model.layers.2.mlp.shared_experts.gate_up_proj.q_scale", "shape": [ 5632, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 720896, "byteOffset": 25314304 }, { "name": "model.layers.2.mlp.shared_experts.down_proj.q_weight", "shape": [ 2048, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 26035200 }, { "name": "model.layers.2.mlp.shared_experts.down_proj.q_scale", "shape": [ 2048, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28918784 } ], "md5sum": "5a1448ed18f2f0d9c4e15ca551cbbf8b" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 184549376, "records": [ { "name": "model.layers.3.mlp.moe_gate_up_proj.q_weight", "shape": [ 64, 2816, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 184549376, "byteOffset": 0 } ], "md5sum": "8e265f4994f7b62590c5e6f0081359cb" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.3.mlp.moe_gate_up_proj.q_scale", "shape": [ 64, 2816, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "6836686ab66e573994f265475180fec2" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.3.mlp.moe_down_proj.q_weight", "shape": [ 64, 2048, 176 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "1963f0f3e0b5420326fa62c238c6c937" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 29279232, "records": [ { "name": "model.layers.2.mlp.moe_down_proj.q_scale", "shape": [ 64, 2048, 44 ], "dtype": "bfloat16", "format": "raw", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11534336 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11538432 }, { "name": "model.layers.3.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11542528 }, { "name": "model.layers.3.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 14688256 }, { "name": "model.layers.3.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 15081472 }, { "name": "model.layers.3.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 15671296 }, { "name": "model.layers.3.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 15745024 }, { "name": "model.layers.3.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15746048 }, { "name": "model.layers.3.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 16794624 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16925696 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19022848 }, { "name": "model.layers.3.mlp.gate.weight", "shape": [ 64, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19284992 }, { "name": "model.layers.3.mlp.shared_experts.gate_up_proj.q_weight", "shape": [ 5632, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 19547136 }, { "name": "model.layers.3.mlp.shared_experts.gate_up_proj.q_scale", "shape": [ 5632, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 720896, "byteOffset": 25314304 }, { "name": "model.layers.3.mlp.shared_experts.down_proj.q_weight", "shape": [ 2048, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 26035200 }, { "name": "model.layers.3.mlp.shared_experts.down_proj.q_scale", "shape": [ 2048, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28918784 } ], "md5sum": "04724f86db655f3a1c1205c2bd5a62cd" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 184549376, "records": [ { "name": "model.layers.4.mlp.moe_gate_up_proj.q_weight", "shape": [ 64, 2816, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 184549376, "byteOffset": 0 } ], "md5sum": "2dcb861888a6ef52f333dcee6afa955f" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.4.mlp.moe_gate_up_proj.q_scale", "shape": [ 64, 2816, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "241a231e639e927c55bb30a338ecb319" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.4.mlp.moe_down_proj.q_weight", "shape": [ 64, 2048, 176 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "9ee7b1730034f7d27a08ad750c633faa" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 29279232, "records": [ { "name": "model.layers.3.mlp.moe_down_proj.q_scale", "shape": [ 64, 2048, 44 ], "dtype": "bfloat16", "format": "raw", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11534336 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11538432 }, { "name": "model.layers.4.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11542528 }, { "name": "model.layers.4.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 14688256 }, { "name": "model.layers.4.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 15081472 }, { "name": "model.layers.4.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 15671296 }, { "name": "model.layers.4.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 15745024 }, { "name": "model.layers.4.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15746048 }, { "name": "model.layers.4.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 16794624 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16925696 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19022848 }, { "name": "model.layers.4.mlp.gate.weight", "shape": [ 64, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19284992 }, { "name": "model.layers.4.mlp.shared_experts.gate_up_proj.q_weight", "shape": [ 5632, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 19547136 }, { "name": "model.layers.4.mlp.shared_experts.gate_up_proj.q_scale", "shape": [ 5632, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 720896, "byteOffset": 25314304 }, { "name": "model.layers.4.mlp.shared_experts.down_proj.q_weight", "shape": [ 2048, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 26035200 }, { "name": "model.layers.4.mlp.shared_experts.down_proj.q_scale", "shape": [ 2048, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28918784 } ], "md5sum": "2ef24a770fc746dceaefbefa282dc530" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 184549376, "records": [ { "name": "model.layers.5.mlp.moe_gate_up_proj.q_weight", "shape": [ 64, 2816, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 184549376, "byteOffset": 0 } ], "md5sum": "3a225c76960137d8241b870ebe4726bd" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.5.mlp.moe_gate_up_proj.q_scale", "shape": [ 64, 2816, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "7808ba6cd5d693d41b62fb7db932c2be" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.5.mlp.moe_down_proj.q_weight", "shape": [ 64, 2048, 176 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "5425fd0fd748c4f8899f0a4c0d02060c" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 29279232, "records": [ { "name": "model.layers.4.mlp.moe_down_proj.q_scale", "shape": [ 64, 2048, 44 ], "dtype": "bfloat16", "format": "raw", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11534336 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11538432 }, { "name": "model.layers.5.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11542528 }, { "name": "model.layers.5.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 14688256 }, { "name": "model.layers.5.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 15081472 }, { "name": "model.layers.5.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 15671296 }, { "name": "model.layers.5.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 15745024 }, { "name": "model.layers.5.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15746048 }, { "name": "model.layers.5.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 16794624 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16925696 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19022848 }, { "name": "model.layers.5.mlp.gate.weight", "shape": [ 64, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19284992 }, { "name": "model.layers.5.mlp.shared_experts.gate_up_proj.q_weight", "shape": [ 5632, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 19547136 }, { "name": "model.layers.5.mlp.shared_experts.gate_up_proj.q_scale", "shape": [ 5632, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 720896, "byteOffset": 25314304 }, { "name": "model.layers.5.mlp.shared_experts.down_proj.q_weight", "shape": [ 2048, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 26035200 }, { "name": "model.layers.5.mlp.shared_experts.down_proj.q_scale", "shape": [ 2048, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28918784 } ], "md5sum": "12f2aad47ba38fea275645c8f952ca35" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 184549376, "records": [ { "name": "model.layers.6.mlp.moe_gate_up_proj.q_weight", "shape": [ 64, 2816, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 184549376, "byteOffset": 0 } ], "md5sum": "2c5200430f16add6e4d28373b8eb5042" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.6.mlp.moe_gate_up_proj.q_scale", "shape": [ 64, 2816, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "7df4cef8e3c4fa2b8bc082389d0e416a" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.6.mlp.moe_down_proj.q_weight", "shape": [ 64, 2048, 176 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "db0c6dcd77691f83cd499c3394c6d503" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 29279232, "records": [ { "name": "model.layers.5.mlp.moe_down_proj.q_scale", "shape": [ 64, 2048, 44 ], "dtype": "bfloat16", "format": "raw", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11534336 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11538432 }, { "name": "model.layers.6.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11542528 }, { "name": "model.layers.6.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 14688256 }, { "name": "model.layers.6.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 15081472 }, { "name": "model.layers.6.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 15671296 }, { "name": "model.layers.6.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 15745024 }, { "name": "model.layers.6.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15746048 }, { "name": "model.layers.6.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 16794624 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16925696 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19022848 }, { "name": "model.layers.6.mlp.gate.weight", "shape": [ 64, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19284992 }, { "name": "model.layers.6.mlp.shared_experts.gate_up_proj.q_weight", "shape": [ 5632, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 19547136 }, { "name": "model.layers.6.mlp.shared_experts.gate_up_proj.q_scale", "shape": [ 5632, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 720896, "byteOffset": 25314304 }, { "name": "model.layers.6.mlp.shared_experts.down_proj.q_weight", "shape": [ 2048, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 26035200 }, { "name": "model.layers.6.mlp.shared_experts.down_proj.q_scale", "shape": [ 2048, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28918784 } ], "md5sum": "d55c4cc540d9aad19a052efaaa1a1de6" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 184549376, "records": [ { "name": "model.layers.7.mlp.moe_gate_up_proj.q_weight", "shape": [ 64, 2816, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 184549376, "byteOffset": 0 } ], "md5sum": "341e0ea06cbb6260bc0150a7ef58719e" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.7.mlp.moe_gate_up_proj.q_scale", "shape": [ 64, 2816, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "e4105bb9a01df049f1df6f9f4b8b84e1" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.7.mlp.moe_down_proj.q_weight", "shape": [ 64, 2048, 176 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "7376bb84a86d5815102a7b8c2c52e4ee" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29279232, "records": [ { "name": "model.layers.6.mlp.moe_down_proj.q_scale", "shape": [ 64, 2048, 44 ], "dtype": "bfloat16", "format": "raw", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11534336 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11538432 }, { "name": "model.layers.7.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11542528 }, { "name": "model.layers.7.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 14688256 }, { "name": "model.layers.7.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 15081472 }, { "name": "model.layers.7.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 15671296 }, { "name": "model.layers.7.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 15745024 }, { "name": "model.layers.7.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15746048 }, { "name": "model.layers.7.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 16794624 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16925696 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19022848 }, { "name": "model.layers.7.mlp.gate.weight", "shape": [ 64, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19284992 }, { "name": "model.layers.7.mlp.shared_experts.gate_up_proj.q_weight", "shape": [ 5632, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 19547136 }, { "name": "model.layers.7.mlp.shared_experts.gate_up_proj.q_scale", "shape": [ 5632, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 720896, "byteOffset": 25314304 }, { "name": "model.layers.7.mlp.shared_experts.down_proj.q_weight", "shape": [ 2048, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 26035200 }, { "name": "model.layers.7.mlp.shared_experts.down_proj.q_scale", "shape": [ 2048, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28918784 } ], "md5sum": "42739b8922ca745d33d7e07e3375d857" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 184549376, "records": [ { "name": "model.layers.8.mlp.moe_gate_up_proj.q_weight", "shape": [ 64, 2816, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 184549376, "byteOffset": 0 } ], "md5sum": "d0b521b707b9eaf7780a9b9b5864708d" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.8.mlp.moe_gate_up_proj.q_scale", "shape": [ 64, 2816, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "83e8bd7d70cc18b7696745b343922231" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.8.mlp.moe_down_proj.q_weight", "shape": [ 64, 2048, 176 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "0b37805532160a011505e7ac5a99b5bc" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 29279232, "records": [ { "name": "model.layers.7.mlp.moe_down_proj.q_scale", "shape": [ 64, 2048, 44 ], "dtype": "bfloat16", "format": "raw", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11534336 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11538432 }, { "name": "model.layers.8.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11542528 }, { "name": "model.layers.8.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 14688256 }, { "name": "model.layers.8.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 15081472 }, { "name": "model.layers.8.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 15671296 }, { "name": "model.layers.8.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 15745024 }, { "name": "model.layers.8.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15746048 }, { "name": "model.layers.8.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 16794624 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16925696 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19022848 }, { "name": "model.layers.8.mlp.gate.weight", "shape": [ 64, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19284992 }, { "name": "model.layers.8.mlp.shared_experts.gate_up_proj.q_weight", "shape": [ 5632, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 19547136 }, { "name": "model.layers.8.mlp.shared_experts.gate_up_proj.q_scale", "shape": [ 5632, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 720896, "byteOffset": 25314304 }, { "name": "model.layers.8.mlp.shared_experts.down_proj.q_weight", "shape": [ 2048, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 26035200 }, { "name": "model.layers.8.mlp.shared_experts.down_proj.q_scale", "shape": [ 2048, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28918784 } ], "md5sum": "2cdca14a871e54eb7fecb5bba21a7bc1" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 184549376, "records": [ { "name": "model.layers.9.mlp.moe_gate_up_proj.q_weight", "shape": [ 64, 2816, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 184549376, "byteOffset": 0 } ], "md5sum": "0f513f9a0d03775b32821402934b7676" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.9.mlp.moe_gate_up_proj.q_scale", "shape": [ 64, 2816, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "d3b9afb8b1f8460fe9af2f496d0cf6bb" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.9.mlp.moe_down_proj.q_weight", "shape": [ 64, 2048, 176 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "5dbd8536bf1148115653b0bc436fd155" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 29279232, "records": [ { "name": "model.layers.8.mlp.moe_down_proj.q_scale", "shape": [ 64, 2048, 44 ], "dtype": "bfloat16", "format": "raw", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11534336 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11538432 }, { "name": "model.layers.9.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11542528 }, { "name": "model.layers.9.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 14688256 }, { "name": "model.layers.9.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 15081472 }, { "name": "model.layers.9.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 15671296 }, { "name": "model.layers.9.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 15745024 }, { "name": "model.layers.9.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15746048 }, { "name": "model.layers.9.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 16794624 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16925696 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19022848 }, { "name": "model.layers.9.mlp.gate.weight", "shape": [ 64, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19284992 }, { "name": "model.layers.9.mlp.shared_experts.gate_up_proj.q_weight", "shape": [ 5632, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 19547136 }, { "name": "model.layers.9.mlp.shared_experts.gate_up_proj.q_scale", "shape": [ 5632, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 720896, "byteOffset": 25314304 }, { "name": "model.layers.9.mlp.shared_experts.down_proj.q_weight", "shape": [ 2048, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 26035200 }, { "name": "model.layers.9.mlp.shared_experts.down_proj.q_scale", "shape": [ 2048, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28918784 } ], "md5sum": "feecefb1c56bf343ffe3714060c27550" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 184549376, "records": [ { "name": "model.layers.10.mlp.moe_gate_up_proj.q_weight", "shape": [ 64, 2816, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 184549376, "byteOffset": 0 } ], "md5sum": "d16eaba8c42bdf5d2bfd17dd51e95659" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.10.mlp.moe_gate_up_proj.q_scale", "shape": [ 64, 2816, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "dafb0ac8d6a35846a4cf4b237f71d2d6" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.10.mlp.moe_down_proj.q_weight", "shape": [ 64, 2048, 176 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "9df235058e7f7d718a6eaa1db5764e53" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 29279232, "records": [ { "name": "model.layers.9.mlp.moe_down_proj.q_scale", "shape": [ 64, 2048, 44 ], "dtype": "bfloat16", "format": "raw", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11534336 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11538432 }, { "name": "model.layers.10.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11542528 }, { "name": "model.layers.10.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 14688256 }, { "name": "model.layers.10.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 15081472 }, { "name": "model.layers.10.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 15671296 }, { "name": "model.layers.10.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 15745024 }, { "name": "model.layers.10.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15746048 }, { "name": "model.layers.10.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 16794624 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16925696 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19022848 }, { "name": "model.layers.10.mlp.gate.weight", "shape": [ 64, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19284992 }, { "name": "model.layers.10.mlp.shared_experts.gate_up_proj.q_weight", "shape": [ 5632, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 19547136 }, { "name": "model.layers.10.mlp.shared_experts.gate_up_proj.q_scale", "shape": [ 5632, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 720896, "byteOffset": 25314304 }, { "name": "model.layers.10.mlp.shared_experts.down_proj.q_weight", "shape": [ 2048, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 26035200 }, { "name": "model.layers.10.mlp.shared_experts.down_proj.q_scale", "shape": [ 2048, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28918784 } ], "md5sum": "40baf16d40e0fe719d6c90e13435bb56" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 184549376, "records": [ { "name": "model.layers.11.mlp.moe_gate_up_proj.q_weight", "shape": [ 64, 2816, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 184549376, "byteOffset": 0 } ], "md5sum": "4a5e3d29c874445c2b1e691799703552" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.11.mlp.moe_gate_up_proj.q_scale", "shape": [ 64, 2816, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "f7d823c624b6069389338731955626d9" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.11.mlp.moe_down_proj.q_weight", "shape": [ 64, 2048, 176 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "6b4e8b74259e5cfc71ec8cac9ce88e81" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 29279232, "records": [ { "name": "model.layers.10.mlp.moe_down_proj.q_scale", "shape": [ 64, 2048, 44 ], "dtype": "bfloat16", "format": "raw", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11534336 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11538432 }, { "name": "model.layers.11.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11542528 }, { "name": "model.layers.11.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 14688256 }, { "name": "model.layers.11.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 15081472 }, { "name": "model.layers.11.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 15671296 }, { "name": "model.layers.11.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 15745024 }, { "name": "model.layers.11.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15746048 }, { "name": "model.layers.11.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 16794624 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16925696 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19022848 }, { "name": "model.layers.11.mlp.gate.weight", "shape": [ 64, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19284992 }, { "name": "model.layers.11.mlp.shared_experts.gate_up_proj.q_weight", "shape": [ 5632, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 19547136 }, { "name": "model.layers.11.mlp.shared_experts.gate_up_proj.q_scale", "shape": [ 5632, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 720896, "byteOffset": 25314304 }, { "name": "model.layers.11.mlp.shared_experts.down_proj.q_weight", "shape": [ 2048, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 26035200 }, { "name": "model.layers.11.mlp.shared_experts.down_proj.q_scale", "shape": [ 2048, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28918784 } ], "md5sum": "6efdb660233d84db84220a0df941207e" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 184549376, "records": [ { "name": "model.layers.12.mlp.moe_gate_up_proj.q_weight", "shape": [ 64, 2816, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 184549376, "byteOffset": 0 } ], "md5sum": "ec3c62c29a5c880dc1676b6136911932" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.12.mlp.moe_gate_up_proj.q_scale", "shape": [ 64, 2816, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "f786eedd3709cb65e05567c7b1c2e968" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.12.mlp.moe_down_proj.q_weight", "shape": [ 64, 2048, 176 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "93dc1b693e9442e9d1802297bc9d8080" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 29279232, "records": [ { "name": "model.layers.11.mlp.moe_down_proj.q_scale", "shape": [ 64, 2048, 44 ], "dtype": "bfloat16", "format": "raw", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11534336 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11538432 }, { "name": "model.layers.12.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11542528 }, { "name": "model.layers.12.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 14688256 }, { "name": "model.layers.12.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 15081472 }, { "name": "model.layers.12.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 15671296 }, { "name": "model.layers.12.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 15745024 }, { "name": "model.layers.12.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15746048 }, { "name": "model.layers.12.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 16794624 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16925696 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19022848 }, { "name": "model.layers.12.mlp.gate.weight", "shape": [ 64, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19284992 }, { "name": "model.layers.12.mlp.shared_experts.gate_up_proj.q_weight", "shape": [ 5632, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 19547136 }, { "name": "model.layers.12.mlp.shared_experts.gate_up_proj.q_scale", "shape": [ 5632, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 720896, "byteOffset": 25314304 }, { "name": "model.layers.12.mlp.shared_experts.down_proj.q_weight", "shape": [ 2048, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 26035200 }, { "name": "model.layers.12.mlp.shared_experts.down_proj.q_scale", "shape": [ 2048, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28918784 } ], "md5sum": "f5cf59fe827ba499af3622ef68b53104" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 184549376, "records": [ { "name": "model.layers.13.mlp.moe_gate_up_proj.q_weight", "shape": [ 64, 2816, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 184549376, "byteOffset": 0 } ], "md5sum": "4c4d6cb1461facc9765a268ba04fa6ee" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.13.mlp.moe_gate_up_proj.q_scale", "shape": [ 64, 2816, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "641ebb59a0d5426d628c8c9c5ca64361" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.13.mlp.moe_down_proj.q_weight", "shape": [ 64, 2048, 176 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "bd6316e9a7e6f95bfa26a2d3f9ccd2e3" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 29279232, "records": [ { "name": "model.layers.12.mlp.moe_down_proj.q_scale", "shape": [ 64, 2048, 44 ], "dtype": "bfloat16", "format": "raw", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11534336 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11538432 }, { "name": "model.layers.13.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11542528 }, { "name": "model.layers.13.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 14688256 }, { "name": "model.layers.13.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 15081472 }, { "name": "model.layers.13.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 15671296 }, { "name": "model.layers.13.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 15745024 }, { "name": "model.layers.13.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15746048 }, { "name": "model.layers.13.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 16794624 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16925696 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19022848 }, { "name": "model.layers.13.mlp.gate.weight", "shape": [ 64, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19284992 }, { "name": "model.layers.13.mlp.shared_experts.gate_up_proj.q_weight", "shape": [ 5632, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 19547136 }, { "name": "model.layers.13.mlp.shared_experts.gate_up_proj.q_scale", "shape": [ 5632, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 720896, "byteOffset": 25314304 }, { "name": "model.layers.13.mlp.shared_experts.down_proj.q_weight", "shape": [ 2048, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 26035200 }, { "name": "model.layers.13.mlp.shared_experts.down_proj.q_scale", "shape": [ 2048, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28918784 } ], "md5sum": "360346127e1a16e9dff6db2ae9055787" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 184549376, "records": [ { "name": "model.layers.14.mlp.moe_gate_up_proj.q_weight", "shape": [ 64, 2816, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 184549376, "byteOffset": 0 } ], "md5sum": "9a2ca6638a396fbb824388bb7a4ed793" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.14.mlp.moe_gate_up_proj.q_scale", "shape": [ 64, 2816, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "b832d5df54d210efacd31fd310915b31" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.14.mlp.moe_down_proj.q_weight", "shape": [ 64, 2048, 176 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "dc976e7be7cfaf6325e789a1af2ebe3e" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 29279232, "records": [ { "name": "model.layers.13.mlp.moe_down_proj.q_scale", "shape": [ 64, 2048, 44 ], "dtype": "bfloat16", "format": "raw", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11534336 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11538432 }, { "name": "model.layers.14.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11542528 }, { "name": "model.layers.14.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 14688256 }, { "name": "model.layers.14.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 15081472 }, { "name": "model.layers.14.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 15671296 }, { "name": "model.layers.14.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 15745024 }, { "name": "model.layers.14.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15746048 }, { "name": "model.layers.14.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 16794624 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16925696 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19022848 }, { "name": "model.layers.14.mlp.gate.weight", "shape": [ 64, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19284992 }, { "name": "model.layers.14.mlp.shared_experts.gate_up_proj.q_weight", "shape": [ 5632, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 19547136 }, { "name": "model.layers.14.mlp.shared_experts.gate_up_proj.q_scale", "shape": [ 5632, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 720896, "byteOffset": 25314304 }, { "name": "model.layers.14.mlp.shared_experts.down_proj.q_weight", "shape": [ 2048, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 26035200 }, { "name": "model.layers.14.mlp.shared_experts.down_proj.q_scale", "shape": [ 2048, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28918784 } ], "md5sum": "683f47a8fc4093d4a015a0541b2be473" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 184549376, "records": [ { "name": "model.layers.15.mlp.moe_gate_up_proj.q_weight", "shape": [ 64, 2816, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 184549376, "byteOffset": 0 } ], "md5sum": "5c823c361d0272b251f5ffb006c6fa4a" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.15.mlp.moe_gate_up_proj.q_scale", "shape": [ 64, 2816, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "1300ab997707e3bac452b549f6f1b791" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.15.mlp.moe_down_proj.q_weight", "shape": [ 64, 2048, 176 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "daf7b029dac6a8ca4f625e1643d2eeed" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 29279232, "records": [ { "name": "model.layers.14.mlp.moe_down_proj.q_scale", "shape": [ 64, 2048, 44 ], "dtype": "bfloat16", "format": "raw", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11534336 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11538432 }, { "name": "model.layers.15.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11542528 }, { "name": "model.layers.15.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 14688256 }, { "name": "model.layers.15.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 15081472 }, { "name": "model.layers.15.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 15671296 }, { "name": "model.layers.15.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 15745024 }, { "name": "model.layers.15.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15746048 }, { "name": "model.layers.15.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 16794624 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16925696 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19022848 }, { "name": "model.layers.15.mlp.gate.weight", "shape": [ 64, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19284992 }, { "name": "model.layers.15.mlp.shared_experts.gate_up_proj.q_weight", "shape": [ 5632, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 19547136 }, { "name": "model.layers.15.mlp.shared_experts.gate_up_proj.q_scale", "shape": [ 5632, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 720896, "byteOffset": 25314304 }, { "name": "model.layers.15.mlp.shared_experts.down_proj.q_weight", "shape": [ 2048, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 26035200 }, { "name": "model.layers.15.mlp.shared_experts.down_proj.q_scale", "shape": [ 2048, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28918784 } ], "md5sum": "7e38de5399b867c755340e40361477d8" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 184549376, "records": [ { "name": "model.layers.16.mlp.moe_gate_up_proj.q_weight", "shape": [ 64, 2816, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 184549376, "byteOffset": 0 } ], "md5sum": "7683e954027028f578b46e774f2e4e0d" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.16.mlp.moe_gate_up_proj.q_scale", "shape": [ 64, 2816, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "c30b17863a3bc75384e74e9c07cd6d1d" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.16.mlp.moe_down_proj.q_weight", "shape": [ 64, 2048, 176 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "7df0c8975e0966abcaec8a6d8dc3666e" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 29279232, "records": [ { "name": "model.layers.15.mlp.moe_down_proj.q_scale", "shape": [ 64, 2048, 44 ], "dtype": "bfloat16", "format": "raw", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11534336 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11538432 }, { "name": "model.layers.16.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11542528 }, { "name": "model.layers.16.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 14688256 }, { "name": "model.layers.16.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 15081472 }, { "name": "model.layers.16.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 15671296 }, { "name": "model.layers.16.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 15745024 }, { "name": "model.layers.16.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15746048 }, { "name": "model.layers.16.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 16794624 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16925696 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19022848 }, { "name": "model.layers.16.mlp.gate.weight", "shape": [ 64, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19284992 }, { "name": "model.layers.16.mlp.shared_experts.gate_up_proj.q_weight", "shape": [ 5632, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 19547136 }, { "name": "model.layers.16.mlp.shared_experts.gate_up_proj.q_scale", "shape": [ 5632, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 720896, "byteOffset": 25314304 }, { "name": "model.layers.16.mlp.shared_experts.down_proj.q_weight", "shape": [ 2048, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 26035200 }, { "name": "model.layers.16.mlp.shared_experts.down_proj.q_scale", "shape": [ 2048, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28918784 } ], "md5sum": "7f16dd445dfb75203d04689a8de74733" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 184549376, "records": [ { "name": "model.layers.17.mlp.moe_gate_up_proj.q_weight", "shape": [ 64, 2816, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 184549376, "byteOffset": 0 } ], "md5sum": "72458d312469b9c35a845ad0ea760977" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.17.mlp.moe_gate_up_proj.q_scale", "shape": [ 64, 2816, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "4da660ec13d5680d859634572e3a790b" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.17.mlp.moe_down_proj.q_weight", "shape": [ 64, 2048, 176 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "7823fda41ec073e680b3f4a3b1d9b2bf" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 29279232, "records": [ { "name": "model.layers.16.mlp.moe_down_proj.q_scale", "shape": [ 64, 2048, 44 ], "dtype": "bfloat16", "format": "raw", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11534336 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11538432 }, { "name": "model.layers.17.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11542528 }, { "name": "model.layers.17.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 14688256 }, { "name": "model.layers.17.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 15081472 }, { "name": "model.layers.17.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 15671296 }, { "name": "model.layers.17.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 15745024 }, { "name": "model.layers.17.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15746048 }, { "name": "model.layers.17.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 16794624 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16925696 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19022848 }, { "name": "model.layers.17.mlp.gate.weight", "shape": [ 64, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19284992 }, { "name": "model.layers.17.mlp.shared_experts.gate_up_proj.q_weight", "shape": [ 5632, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 19547136 }, { "name": "model.layers.17.mlp.shared_experts.gate_up_proj.q_scale", "shape": [ 5632, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 720896, "byteOffset": 25314304 }, { "name": "model.layers.17.mlp.shared_experts.down_proj.q_weight", "shape": [ 2048, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 26035200 }, { "name": "model.layers.17.mlp.shared_experts.down_proj.q_scale", "shape": [ 2048, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28918784 } ], "md5sum": "c77169eca0567ead25d2b4f807f85eb6" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 184549376, "records": [ { "name": "model.layers.18.mlp.moe_gate_up_proj.q_weight", "shape": [ 64, 2816, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 184549376, "byteOffset": 0 } ], "md5sum": "c3e81e764b5456e48a603942b80274a6" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.18.mlp.moe_gate_up_proj.q_scale", "shape": [ 64, 2816, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "23411bbfabdec689846a2cd064ec08f6" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.18.mlp.moe_down_proj.q_weight", "shape": [ 64, 2048, 176 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "358262ba79ac0495b92f7e6fcb5e2ae7" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 29279232, "records": [ { "name": "model.layers.17.mlp.moe_down_proj.q_scale", "shape": [ 64, 2048, 44 ], "dtype": "bfloat16", "format": "raw", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11534336 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11538432 }, { "name": "model.layers.18.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11542528 }, { "name": "model.layers.18.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 14688256 }, { "name": "model.layers.18.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 15081472 }, { "name": "model.layers.18.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 15671296 }, { "name": "model.layers.18.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 15745024 }, { "name": "model.layers.18.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15746048 }, { "name": "model.layers.18.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 16794624 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16925696 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19022848 }, { "name": "model.layers.18.mlp.gate.weight", "shape": [ 64, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19284992 }, { "name": "model.layers.18.mlp.shared_experts.gate_up_proj.q_weight", "shape": [ 5632, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 19547136 }, { "name": "model.layers.18.mlp.shared_experts.gate_up_proj.q_scale", "shape": [ 5632, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 720896, "byteOffset": 25314304 }, { "name": "model.layers.18.mlp.shared_experts.down_proj.q_weight", "shape": [ 2048, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 26035200 }, { "name": "model.layers.18.mlp.shared_experts.down_proj.q_scale", "shape": [ 2048, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28918784 } ], "md5sum": "87a3f84d55f0964a343e6d255c30a29b" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 184549376, "records": [ { "name": "model.layers.19.mlp.moe_gate_up_proj.q_weight", "shape": [ 64, 2816, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 184549376, "byteOffset": 0 } ], "md5sum": "e1cc38cb3dec8a8fb8fa08af72eae37c" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.19.mlp.moe_gate_up_proj.q_scale", "shape": [ 64, 2816, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "45a0f508d0bb33224b060fdde2fab9fd" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.19.mlp.moe_down_proj.q_weight", "shape": [ 64, 2048, 176 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "dbbeb4832b7529767841dda7eb703467" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 29279232, "records": [ { "name": "model.layers.18.mlp.moe_down_proj.q_scale", "shape": [ 64, 2048, 44 ], "dtype": "bfloat16", "format": "raw", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11534336 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11538432 }, { "name": "model.layers.19.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11542528 }, { "name": "model.layers.19.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 14688256 }, { "name": "model.layers.19.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 15081472 }, { "name": "model.layers.19.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 15671296 }, { "name": "model.layers.19.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 15745024 }, { "name": "model.layers.19.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15746048 }, { "name": "model.layers.19.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 16794624 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16925696 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19022848 }, { "name": "model.layers.19.mlp.gate.weight", "shape": [ 64, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19284992 }, { "name": "model.layers.19.mlp.shared_experts.gate_up_proj.q_weight", "shape": [ 5632, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 19547136 }, { "name": "model.layers.19.mlp.shared_experts.gate_up_proj.q_scale", "shape": [ 5632, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 720896, "byteOffset": 25314304 }, { "name": "model.layers.19.mlp.shared_experts.down_proj.q_weight", "shape": [ 2048, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 26035200 }, { "name": "model.layers.19.mlp.shared_experts.down_proj.q_scale", "shape": [ 2048, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28918784 } ], "md5sum": "cb00bc5bc69031f2baa0a818593a6308" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 184549376, "records": [ { "name": "model.layers.20.mlp.moe_gate_up_proj.q_weight", "shape": [ 64, 2816, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 184549376, "byteOffset": 0 } ], "md5sum": "89040711e6aa2b61e405e0b977f0e5c7" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.20.mlp.moe_gate_up_proj.q_scale", "shape": [ 64, 2816, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "8da6d8dc34dcfc03c6c08905cfe79437" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.20.mlp.moe_down_proj.q_weight", "shape": [ 64, 2048, 176 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "da5cf3a28182036c810447738a13a66b" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 29279232, "records": [ { "name": "model.layers.19.mlp.moe_down_proj.q_scale", "shape": [ 64, 2048, 44 ], "dtype": "bfloat16", "format": "raw", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11534336 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11538432 }, { "name": "model.layers.20.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11542528 }, { "name": "model.layers.20.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 14688256 }, { "name": "model.layers.20.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 15081472 }, { "name": "model.layers.20.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 15671296 }, { "name": "model.layers.20.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 15745024 }, { "name": "model.layers.20.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15746048 }, { "name": "model.layers.20.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 16794624 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16925696 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19022848 }, { "name": "model.layers.20.mlp.gate.weight", "shape": [ 64, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19284992 }, { "name": "model.layers.20.mlp.shared_experts.gate_up_proj.q_weight", "shape": [ 5632, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 19547136 }, { "name": "model.layers.20.mlp.shared_experts.gate_up_proj.q_scale", "shape": [ 5632, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 720896, "byteOffset": 25314304 }, { "name": "model.layers.20.mlp.shared_experts.down_proj.q_weight", "shape": [ 2048, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 26035200 }, { "name": "model.layers.20.mlp.shared_experts.down_proj.q_scale", "shape": [ 2048, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28918784 } ], "md5sum": "bd0bc40aa2beaa94a66609ba234eb98a" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 184549376, "records": [ { "name": "model.layers.21.mlp.moe_gate_up_proj.q_weight", "shape": [ 64, 2816, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 184549376, "byteOffset": 0 } ], "md5sum": "6fe4ea05e1d514efc23566ad8897c1a4" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.21.mlp.moe_gate_up_proj.q_scale", "shape": [ 64, 2816, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "edb10d3a0362259c844abd681f00b57f" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.21.mlp.moe_down_proj.q_weight", "shape": [ 64, 2048, 176 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "8cf790f09998c2ee8726dadbee27776d" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 29279232, "records": [ { "name": "model.layers.20.mlp.moe_down_proj.q_scale", "shape": [ 64, 2048, 44 ], "dtype": "bfloat16", "format": "raw", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11534336 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11538432 }, { "name": "model.layers.21.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11542528 }, { "name": "model.layers.21.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 14688256 }, { "name": "model.layers.21.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 15081472 }, { "name": "model.layers.21.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 15671296 }, { "name": "model.layers.21.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 15745024 }, { "name": "model.layers.21.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15746048 }, { "name": "model.layers.21.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 16794624 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16925696 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19022848 }, { "name": "model.layers.21.mlp.gate.weight", "shape": [ 64, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19284992 }, { "name": "model.layers.21.mlp.shared_experts.gate_up_proj.q_weight", "shape": [ 5632, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 19547136 }, { "name": "model.layers.21.mlp.shared_experts.gate_up_proj.q_scale", "shape": [ 5632, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 720896, "byteOffset": 25314304 }, { "name": "model.layers.21.mlp.shared_experts.down_proj.q_weight", "shape": [ 2048, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 26035200 }, { "name": "model.layers.21.mlp.shared_experts.down_proj.q_scale", "shape": [ 2048, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28918784 } ], "md5sum": "8f9356ac0ce5c7a19349c4090f59fb30" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 184549376, "records": [ { "name": "model.layers.22.mlp.moe_gate_up_proj.q_weight", "shape": [ 64, 2816, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 184549376, "byteOffset": 0 } ], "md5sum": "e68534b8ad5519b90538aa42b53c043e" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.22.mlp.moe_gate_up_proj.q_scale", "shape": [ 64, 2816, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "48e8b10ac27055abfffa65dcc62dcdff" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.22.mlp.moe_down_proj.q_weight", "shape": [ 64, 2048, 176 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "be93840acabfc15bed8960afd9788afb" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 29279232, "records": [ { "name": "model.layers.21.mlp.moe_down_proj.q_scale", "shape": [ 64, 2048, 44 ], "dtype": "bfloat16", "format": "raw", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11534336 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11538432 }, { "name": "model.layers.22.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11542528 }, { "name": "model.layers.22.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 14688256 }, { "name": "model.layers.22.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 15081472 }, { "name": "model.layers.22.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 15671296 }, { "name": "model.layers.22.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 15745024 }, { "name": "model.layers.22.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15746048 }, { "name": "model.layers.22.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 16794624 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16925696 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19022848 }, { "name": "model.layers.22.mlp.gate.weight", "shape": [ 64, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19284992 }, { "name": "model.layers.22.mlp.shared_experts.gate_up_proj.q_weight", "shape": [ 5632, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 19547136 }, { "name": "model.layers.22.mlp.shared_experts.gate_up_proj.q_scale", "shape": [ 5632, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 720896, "byteOffset": 25314304 }, { "name": "model.layers.22.mlp.shared_experts.down_proj.q_weight", "shape": [ 2048, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 26035200 }, { "name": "model.layers.22.mlp.shared_experts.down_proj.q_scale", "shape": [ 2048, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28918784 } ], "md5sum": "79215ec8e21962243d7840ddc1ce758f" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 184549376, "records": [ { "name": "model.layers.23.mlp.moe_gate_up_proj.q_weight", "shape": [ 64, 2816, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 184549376, "byteOffset": 0 } ], "md5sum": "db7acc0bacb9969ca6b7df2b96a81188" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.23.mlp.moe_gate_up_proj.q_scale", "shape": [ 64, 2816, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "ddc3538ccdf293d97478b8cf1ae44c93" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.23.mlp.moe_down_proj.q_weight", "shape": [ 64, 2048, 176 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "533d2d37d864f37693c672b4e168294f" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 29279232, "records": [ { "name": "model.layers.22.mlp.moe_down_proj.q_scale", "shape": [ 64, 2048, 44 ], "dtype": "bfloat16", "format": "raw", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11534336 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11538432 }, { "name": "model.layers.23.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11542528 }, { "name": "model.layers.23.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 14688256 }, { "name": "model.layers.23.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 15081472 }, { "name": "model.layers.23.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 15671296 }, { "name": "model.layers.23.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 15745024 }, { "name": "model.layers.23.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15746048 }, { "name": "model.layers.23.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 16794624 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16925696 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19022848 }, { "name": "model.layers.23.mlp.gate.weight", "shape": [ 64, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19284992 }, { "name": "model.layers.23.mlp.shared_experts.gate_up_proj.q_weight", "shape": [ 5632, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 19547136 }, { "name": "model.layers.23.mlp.shared_experts.gate_up_proj.q_scale", "shape": [ 5632, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 720896, "byteOffset": 25314304 }, { "name": "model.layers.23.mlp.shared_experts.down_proj.q_weight", "shape": [ 2048, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 26035200 }, { "name": "model.layers.23.mlp.shared_experts.down_proj.q_scale", "shape": [ 2048, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28918784 } ], "md5sum": "240d7a194858bfbcd7faa56a87bcbeb5" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 184549376, "records": [ { "name": "model.layers.24.mlp.moe_gate_up_proj.q_weight", "shape": [ 64, 2816, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 184549376, "byteOffset": 0 } ], "md5sum": "033972268afd719b33a7b0d0c4ee69ea" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.24.mlp.moe_gate_up_proj.q_scale", "shape": [ 64, 2816, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "ed51159afa8c766a3fc9a5632680060b" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.24.mlp.moe_down_proj.q_weight", "shape": [ 64, 2048, 176 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "83233d1114a93cc13911a0d8c67dd07d" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 29279232, "records": [ { "name": "model.layers.23.mlp.moe_down_proj.q_scale", "shape": [ 64, 2048, 44 ], "dtype": "bfloat16", "format": "raw", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11534336 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11538432 }, { "name": "model.layers.24.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11542528 }, { "name": "model.layers.24.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 14688256 }, { "name": "model.layers.24.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 15081472 }, { "name": "model.layers.24.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 15671296 }, { "name": "model.layers.24.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 15745024 }, { "name": "model.layers.24.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15746048 }, { "name": "model.layers.24.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 16794624 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16925696 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19022848 }, { "name": "model.layers.24.mlp.gate.weight", "shape": [ 64, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19284992 }, { "name": "model.layers.24.mlp.shared_experts.gate_up_proj.q_weight", "shape": [ 5632, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 19547136 }, { "name": "model.layers.24.mlp.shared_experts.gate_up_proj.q_scale", "shape": [ 5632, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 720896, "byteOffset": 25314304 }, { "name": "model.layers.24.mlp.shared_experts.down_proj.q_weight", "shape": [ 2048, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 26035200 }, { "name": "model.layers.24.mlp.shared_experts.down_proj.q_scale", "shape": [ 2048, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28918784 } ], "md5sum": "5a2280321bdb6cd01a8d0b1809a32c46" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 184549376, "records": [ { "name": "model.layers.25.mlp.moe_gate_up_proj.q_weight", "shape": [ 64, 2816, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 184549376, "byteOffset": 0 } ], "md5sum": "adcadc9e1b5aa18901949119936e60c8" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.25.mlp.moe_gate_up_proj.q_scale", "shape": [ 64, 2816, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "0367e10b699e7d7629f0ca112235a940" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.25.mlp.moe_down_proj.q_weight", "shape": [ 64, 2048, 176 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "b43ae0388648936b710472b2a9e3e431" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 29279232, "records": [ { "name": "model.layers.24.mlp.moe_down_proj.q_scale", "shape": [ 64, 2048, 44 ], "dtype": "bfloat16", "format": "raw", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11534336 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11538432 }, { "name": "model.layers.25.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11542528 }, { "name": "model.layers.25.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 14688256 }, { "name": "model.layers.25.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 15081472 }, { "name": "model.layers.25.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 15671296 }, { "name": "model.layers.25.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 15745024 }, { "name": "model.layers.25.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15746048 }, { "name": "model.layers.25.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 16794624 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16925696 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19022848 }, { "name": "model.layers.25.mlp.gate.weight", "shape": [ 64, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19284992 }, { "name": "model.layers.25.mlp.shared_experts.gate_up_proj.q_weight", "shape": [ 5632, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 19547136 }, { "name": "model.layers.25.mlp.shared_experts.gate_up_proj.q_scale", "shape": [ 5632, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 720896, "byteOffset": 25314304 }, { "name": "model.layers.25.mlp.shared_experts.down_proj.q_weight", "shape": [ 2048, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 26035200 }, { "name": "model.layers.25.mlp.shared_experts.down_proj.q_scale", "shape": [ 2048, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28918784 } ], "md5sum": "8f4e4e60c9df90e548a0520c047af261" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 184549376, "records": [ { "name": "model.layers.26.mlp.moe_gate_up_proj.q_weight", "shape": [ 64, 2816, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 184549376, "byteOffset": 0 } ], "md5sum": "aeddb21991e6573caede2d7c68185015" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.26.mlp.moe_gate_up_proj.q_scale", "shape": [ 64, 2816, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "ec689d3391d173aace5532bf59515e50" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 92274688, "records": [ { "name": "model.layers.26.mlp.moe_down_proj.q_weight", "shape": [ 64, 2048, 176 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 92274688, "byteOffset": 0 } ], "md5sum": "f88861ef7b7936346d543a2d69b50ac8" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 29279232, "records": [ { "name": "model.layers.25.mlp.moe_down_proj.q_scale", "shape": [ 64, 2048, 44 ], "dtype": "bfloat16", "format": "raw", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11534336 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11538432 }, { "name": "model.layers.26.self_attn.q_proj.q_weight", "shape": [ 3072, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11542528 }, { "name": "model.layers.26.self_attn.q_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 393216, "byteOffset": 14688256 }, { "name": "model.layers.26.self_attn.kv_a_proj_with_mqa.q_weight", "shape": [ 576, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 15081472 }, { "name": "model.layers.26.self_attn.kv_a_proj_with_mqa.q_scale", "shape": [ 576, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 73728, "byteOffset": 15671296 }, { "name": "model.layers.26.self_attn.kv_a_layernorm.weight", "shape": [ 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1024, "byteOffset": 15745024 }, { "name": "model.layers.26.self_attn.kv_b_proj.q_weight", "shape": [ 4096, 64 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15746048 }, { "name": "model.layers.26.self_attn.kv_b_proj.q_scale", "shape": [ 4096, 16 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131072, "byteOffset": 16794624 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 2048, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 16925696 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 2048, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19022848 }, { "name": "model.layers.26.mlp.gate.weight", "shape": [ 64, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 262144, "byteOffset": 19284992 }, { "name": "model.layers.26.mlp.shared_experts.gate_up_proj.q_weight", "shape": [ 5632, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 5767168, "byteOffset": 19547136 }, { "name": "model.layers.26.mlp.shared_experts.gate_up_proj.q_scale", "shape": [ 5632, 64 ], "dtype": "bfloat16", "format": "raw", "nbytes": 720896, "byteOffset": 25314304 }, { "name": "model.layers.26.mlp.shared_experts.down_proj.q_weight", "shape": [ 2048, 352 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2883584, "byteOffset": 26035200 }, { "name": "model.layers.26.mlp.shared_experts.down_proj.q_scale", "shape": [ 2048, 88 ], "dtype": "bfloat16", "format": "raw", "nbytes": 360448, "byteOffset": 28918784 } ], "md5sum": "f7a67a47df2a804e3296f0e30e5ff7a6" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 11542528, "records": [ { "name": "model.layers.26.mlp.moe_down_proj.q_scale", "shape": [ 64, 2048, 44 ], "dtype": "bfloat16", "format": "raw", "nbytes": 11534336, "byteOffset": 0 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11534336 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 11538432 } ], "md5sum": "b08e08a725e528c81d407433a0ada2a0" } ] }