riczhou's picture
Initial commit
98b9344 verified
{
"metadata": {
"ParamSize": 357,
"ParamBytes": 26272210944.0,
"BitsPerParam": 3.711629079784502
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 65536000,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32000,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536000,
"byteOffset": 0
}
],
"md5sum": "f084dc6992d7b9cda35b0a08e67749c5"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.30.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "6694945f217b115d9d61add41d3f749e"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.30.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "23ffbf67c0a2146702affb3d24113d88"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.30.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "898ee52b7c6ca38f3b619e44e7e5e783"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.30.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "cd8b58ce499f1fd9728e66ec590c4766"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.31.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "213ac0cfa38ed1da04ee95d9f96bf976"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.31.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "efbd93ca2c2915f88f717898c3924995"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.31.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "a9551a4592037159776ca6cb57624ace"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.31.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "1c8635fe595953945f19295c7cf942ef"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 65536000,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32000,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536000,
"byteOffset": 0
}
],
"md5sum": "5786b467996342a94d9a4dfffddfbc9f"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 31891456,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32000,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192000,
"byteOffset": 0
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8192000
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8200192
},
{
"name": "model.layers.31.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 8208384
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8273920
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8282112
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 8290304
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 20873216
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 22446080
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 30834688
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 31883264
}
],
"md5sum": "ee801c40285570d08d706afa2d5d11c2"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.0.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "4a6e03aea0e7d134800cac4d000ca7e5"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.0.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "5b57c83387a1c8f0a26a0d8bed4ad7de"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.0.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "5f9636c51cbf1d44efd07a9cef29862d"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.0.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "f707dea9c5dc6bf53a1a9ff44037755a"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.1.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "cde0279e39f4384e7951fab2719d028c"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.1.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "10210a47b0eee9f0da0ee73734b2ec4d"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.1.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "46dc0245928aeb0f36a8980b8c4914a2"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.1.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "0df31746fd968763e17a06d24b549cdc"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 31932416,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
32000,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192000,
"byteOffset": 0
},
{
"name": "model.layers.0.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 8192000
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8257536
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8265728
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 8273920
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 20856832
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 22429696
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 30818304
},
{
"name": "model.layers.1.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 31866880
}
],
"md5sum": "a6f05ea9635e7573129066a588b81938"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.2.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "dc1034c152401224773974e9769e0d1d"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.2.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "c9cb8d09cbe5e9f1ff8eb6650e847192"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.2.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "11362d56d228d3446429750145988b63"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.2.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "42f3ca73afa8531c0c02495f35dc995c"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 23691264,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23601152
},
{
"name": "model.layers.2.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23609344
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23674880
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23683072
}
],
"md5sum": "ae65d646920213464e62e24801ed9f25"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.3.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "09add6a8b89325ab9b13f3b7ab4627da"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.3.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "48da5b5fbe70bbd985f884111fd73d3e"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.3.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "a0eb4a475a307c2ce86178d624856aac"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.3.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "bb462f70e6a01c0c60165aaa3856c4d7"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 23658496,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.3.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23592960
}
],
"md5sum": "5d61d616a32dff6f676b2b1c84eb6cf6"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.10.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "7c2d372ec8eaecd5c566802a9ef672b6"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.10.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "6a64303b4d5aaa89617b44c2d52070a8"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.10.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "c0856ee8654b07ec5f228997c3e97fe0"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.10.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "2b8a698c23b5f0d43d22eb8df8014e91"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 23658496,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.10.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23592960
}
],
"md5sum": "8efc14922491f7d8c7258150ca7b591d"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.8.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "49bbf6f3018719505dfa41105ea45d29"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.8.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "93bbaaa323aff6ba5158e58d07c1e4d5"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.8.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "d5401eccbea1cf163ee586a11221bd89"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.8.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "f58fed06a3e10d6dd046e9a03b592097"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.9.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "ff69a35b0ada0865cefcdb0cd6e8fd97"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.9.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "6b3d6401568d11c8ff854eb524c0222a"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.9.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "d0a467f150c85f78f34c490ff9e0d48f"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.9.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "b9a025ad3ceb4aad0d4ce46f22b762a6"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 23691264,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23601152
},
{
"name": "model.layers.9.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23609344
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23674880
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23683072
}
],
"md5sum": "5a6529f827c4f0debfcd0118196576f9"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.11.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "0cd07bb73ea20a74533168b847951647"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.11.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "8cee07b4a79151dec86c749864e1240b"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.11.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "47139f9a5aef4978d87aba9ddd5e53cb"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.11.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "65e8556e6aa53cc1df59e01462d9f835"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 23674880,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23601152
},
{
"name": "model.layers.11.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23609344
}
],
"md5sum": "173c26d0cb09387e641e6605ce039864"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.12.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "a86b34cbcac452a9c95ff906e3895317"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.12.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "f556df7a75dcdb976e42a947994ce739"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.12.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "c3d35c112ac4d987d16c9430894bd3bf"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.12.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "c73b4bd62c19cd7eb6fd20619ce51f0a"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 23691264,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23601152
},
{
"name": "model.layers.12.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23609344
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23674880
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23683072
}
],
"md5sum": "4421f3921a74b29ef71cc6d417ced955"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.13.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "82278f0c9cc9cddc199f9c0b4e956bbe"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.13.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "34bfdf1119e1cbe31e3f08c2e27f147f"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.13.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "1a99855adc0f327c0973930fbab872b9"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.13.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "7db6259ea5a9fc64fcebc26a7d329538"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 23658496,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.13.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23592960
}
],
"md5sum": "e5c22ba257e1cce441a6190d9b7ca3b5"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.14.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "eabf51e21262988f0048b6816ee339d6"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.14.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "c099d63f696cf0249f8349dc07f55dd2"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.14.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "2c4e5be5c7c6dafeb8cb6f002c90f334"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.14.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "578dc84b751843c1f6404f08a81ab1eb"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 23691264,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23601152
},
{
"name": "model.layers.14.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23609344
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23674880
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23683072
}
],
"md5sum": "421fdfa8a7fa17da2221570d3f89c7b7"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.15.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "09b4f9ca06e6ba8b0c384bea779a9606"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.15.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "a1ebe677819d01ee46ee8d78a0d9169d"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.15.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "71207535df5e8cdf7ea5aa2e29132f3e"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.15.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "caf35feedbf7a979fd1cdf80f951b4ed"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 23658496,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.15.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23592960
}
],
"md5sum": "d4a9d6b303f3b523d6185dc2ad04c69c"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.16.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "fe044fade8def41c0ff0ec6393b7d528"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.16.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "99a39d409bfcd853c722503375174680"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.16.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "2723df0336f9db7863e77103f7936148"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.16.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "384f616ca117317dec7cb06d4f72a39f"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 23674880,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23601152
},
{
"name": "model.layers.16.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23609344
}
],
"md5sum": "cb8406798ba2f5b3eb3c91091457bd4e"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.17.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "dcd989cbd29bad80b14378fccb50da24"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.17.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "aed1b1b5e8f9e4fff668d3fdceb71b25"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.17.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "40b124b535a49d199cab6511ca9d9cd1"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.17.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "8f3ab9c711fe470163e72f4cd6995930"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 23691264,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23601152
},
{
"name": "model.layers.17.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23609344
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23674880
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23683072
}
],
"md5sum": "e84a9d2baef0cdbf36cb71aaea73bc56"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.18.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "3a85cefe621a0811cd4086c6fe747f58"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.18.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "f7ef917f141345aadb9e806f50f1d21f"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.18.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "a19b349c8997ae9973d8b35657f1446e"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.18.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "162f62f8baeddebbf6c9e65f900a88bd"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 23658496,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.18.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23592960
}
],
"md5sum": "56b7aa17f7e45bf389b5498afa892f22"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.19.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "3cbd4d179cabddef49d003ffe1bdcbac"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.19.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "e420cf490657200f53745c82179caef3"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.19.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "d75ee4b674c714992e8ed8d2ec4dbae4"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.19.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "68c67d044fa3c9a4cfb5f504236ee1a9"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 23691264,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23601152
},
{
"name": "model.layers.19.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23609344
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23674880
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23683072
}
],
"md5sum": "3549d9c659990885667dc4bd609cf076"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.20.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "534092fc23d3b0ce0aee347f424d069c"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.20.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "359ecd7e5daf9e890d1ec2999506a352"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.20.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "542c327fcc15f5a85fc09182d13cd3e5"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.20.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "c59445a431259d6a2416ef91d877478e"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 23658496,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.20.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23592960
}
],
"md5sum": "34afe25ef785e653a42efbbcab096d69"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.21.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "1a1efe0ef75ff5ef57b5a2aca3055912"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.21.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "4326aa1a10a88b73cbedca1ea75bf4b6"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.21.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "3874d56d7d5fbd6f3cbc1330a42c7323"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.21.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "99904688765535983db02d7e52a6f44c"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 23691264,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23601152
},
{
"name": "model.layers.21.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23609344
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23674880
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23683072
}
],
"md5sum": "6a831c4fd7ccf03a7a6cdc40bd230f58"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.22.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "0bce36079d8f66187bb46541d15567d5"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.22.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "1afa300343dbe4930c831fc4df68c5bc"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.22.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "d8667d386a2851b94c2411bfbe574d4b"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.22.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "3d91a4ddd5b9bd277f9a9a3e4a70af80"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 23658496,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.22.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23592960
}
],
"md5sum": "c26eff0d35583beb2a36591e4008a9b7"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.23.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "5a3f11bbe4c1a4b45b795cb9e6a6cc56"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.23.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "7b3feae474c2e4470285b7df8c1e7ed9"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.23.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "65607c6b8fea09bcf69885a7b1b190c1"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.23.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "50b93e2d39b3db40847825b43d70c12a"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 23674880,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23601152
},
{
"name": "model.layers.23.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23609344
}
],
"md5sum": "d7ed2dfd80ec7f5a8426c1a82bcc4a03"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.24.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "e2cdba1e90f848b511082d2a2c302110"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.24.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "690a6c1ccdc114fbd698adcc889c4100"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.24.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "4fa1948d598e121066632a72d9c1aafe"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.24.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "4a5abc018a50da0e4155643bb57e96b6"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 23691264,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23601152
},
{
"name": "model.layers.24.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23609344
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23674880
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23683072
}
],
"md5sum": "e2b3413adb86e12c65526f34bca62060"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.25.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "ff700cba2578659dce26e2e49cb7e332"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.25.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "4bf529bde7c2ab1ecd7515b2a626d941"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.25.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "f41999f7613e52dfb6f1c6fd479f75df"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.25.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "2ab691cec5ab9ac30e66bbc08505e5d5"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 23658496,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.25.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23592960
}
],
"md5sum": "d8129bf3eb41d835e84ae4a557db4cc3"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.26.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "f69a4cfe251d8b889f12947fe3acbd6e"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.26.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "3618381a4e821b0e8664ced6c04ee53b"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.26.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "a68028c14cae996cdf983f9559d616ab"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.26.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "fbd3917a31ed626effde97fce51571e1"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 23691264,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23601152
},
{
"name": "model.layers.26.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23609344
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23674880
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23683072
}
],
"md5sum": "5a17ecd6b0928f308268c7d8b27f9ff1"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.27.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "f54739797454d03e5cb827e98045b285"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.27.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "1eacaf709acde4eec01909383abdb277"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.27.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "e0834c1975bc9462dd797dc9635aae55"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.27.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "3a9aeb917a11ec2f9a70c677f1ace648"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 23658496,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.27.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23592960
}
],
"md5sum": "ec7f60b6073925dc0c7311890bd2e0be"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.28.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "96d1c187b6b17b6e22ecfa58b3fb8dca"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.28.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "598fe22c2571a90d800cc875ff487ca4"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.28.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "1d0c2fa805d1b9e4dff3a1eafcb84177"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.28.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d85020676dd429ebdb4c24852a863af7"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 23674880,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23601152
},
{
"name": "model.layers.28.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23609344
}
],
"md5sum": "a55664f8e3d65f6f9240c3d7ab420bce"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.29.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "80ce0db7f60f6fd2c32188589865029e"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.29.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "1e8f86847060eea69798434184e8156a"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.29.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "a3585f8f6cfb4abd8702dd6f0e714c3d"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.29.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "3f48cb80d5a79cba9c7fe37e25133ab5"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 23691264,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23601152
},
{
"name": "model.layers.29.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23609344
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23674880
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23683072
}
],
"md5sum": "c9fa3a0a243980e95ce21ff15a8f4655"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 23658496,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.30.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23592960
}
],
"md5sum": "275cc0234601262d511e7c088652096b"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.4.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "d24b117d5691a9f71f624f3f6c6116cd"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.4.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "bd48d3f4fc1241c1d7e115f10a47af6c"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.4.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "ae19af7b4903c6393f48d2fb5bca0d4c"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.4.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "bb0f2618aae72d82b7bf460ad02232fd"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 23691264,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23601152
},
{
"name": "model.layers.4.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23609344
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23674880
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23683072
}
],
"md5sum": "30483c7e85c8568e52a5a5ada8b72e68"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 23658496,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.5.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23592960
}
],
"md5sum": "48b203fd94b3bfa8f928d85a02189230"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.5.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "87b1d610b25a6bb06650f53406bc288c"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.5.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "2f112274f5d4c5abf1890e3c89a50e94"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.5.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "af7dd569e7ebf62b0b83d29d6a7b9ccb"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.5.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "112862742cc33516ab6a6612b3e309a3"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.6.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "392dfd664889f6d079d1a58947d44474"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.6.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "cee58726a69efedfed9a3a6d234e271c"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.6.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "25eada4ef6a5da26fce63fdf7d54b3c2"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.6.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "0793bc3766012e85424e0d16eff0f925"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 23674880,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23601152
},
{
"name": "model.layers.6.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23609344
}
],
"md5sum": "978b4f47cff5f309b7b2b61c04f78958"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 469762048,
"records": [
{
"name": "model.layers.7.moe.e1_e3.q_weight",
"shape": [
8,
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 469762048,
"byteOffset": 0
}
],
"md5sum": "bf0b1e9bf0f2777eedc041e2739574be"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.7.moe.e1_e3.q_scale",
"shape": [
8,
28672,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "78c5318198936e3f816afabb242ee16a"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.7.moe.e2.q_weight",
"shape": [
8,
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "2bf0d254956f30039ad0ab0e2ce21261"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.7.moe.e2.q_scale",
"shape": [
8,
4096,
448
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "378509a1a5d221955178a17c8200735c"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 23691264,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23601152
},
{
"name": "model.layers.7.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23609344
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23674880
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23683072
}
],
"md5sum": "8ddb96c3b2725d63a3d972c12e4f85b5"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 23658496,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.8.moe.gate.weight",
"shape": [
8,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 23592960
}
],
"md5sum": "109fd4f9a5a5ac44d3cd7eacb7dbc3ad"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 23592960,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
}
],
"md5sum": "d4b9e4d0210f965a364ea4778a76bb77"
}
]
}