mengshyu's picture
Initial commit
0371e08 verified
{
"metadata": {
"ParamSize": 323,
"ParamBytes": 2432833536.0,
"BitsPerParam": 5.093499892671169
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 197001216,
"records": [
{
"name": "lm_head.weight",
"shape": [
32064,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 197001216,
"byteOffset": 0
}
],
"md5sum": "48a04baa06c8140d1ae4fd61ea2a51b1"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.21.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "dcfbe47d5ab4a791d37064b1b492d219"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 33245184,
"records": [
{
"name": "transformer.h.21.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 0
},
{
"name": "transformer.h.21.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 6144
},
{
"name": "transformer.h.21.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12589056
},
{
"name": "transformer.h.21.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14161920
},
{
"name": "transformer.h.21.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17307648
},
{
"name": "transformer.h.21.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 17313792
},
{
"name": "transformer.h.21.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 31469568
},
{
"name": "transformer.h.22.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33239040
}
],
"md5sum": "6dcf6c2006772a211b2341391225c689"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.22.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "72e35ca28af9a2e49fba33093c3a052e"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.22.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.22.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.22.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.22.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.22.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.22.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "e5f37a48d07f44d89146e53d33ac2785"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.23.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "15ccafdc0e6c51f645f8499c64a2a40c"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.22.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.22.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.23.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.23.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.23.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.23.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.23.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "0e7426cb2a95eaad4c26dc2e9ef5effa"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.23.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.23.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.23.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.23.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.24.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "12e0572957c39cd0092357d592cc769d"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.24.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "3b24af06d1c5c7af0fdbe7f5ed5beba6"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.24.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.24.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.24.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.24.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.24.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.24.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "423c2a44c5920b0a768287529e613f6f"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.25.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f5316244f2e0d55f365571c94e672532"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.24.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.24.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.25.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.25.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.25.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.25.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.25.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "8e12fcac3fc2adb9c57692301a5c999f"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.25.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.25.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.25.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.25.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.26.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "35cfa3831e71eb73dbb8e0cd66608592"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.26.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "ed8e3706357e66a58672c0b502a4fff5"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.26.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.26.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.26.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.26.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.26.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.26.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "dddc84753a66a899e1a661808d149fe2"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.27.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e9629dc69301f7c59a4e63459ee3e8fb"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.26.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.26.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.27.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.27.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.27.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.27.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.27.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "5432a1d146c5e1e0b3247981c5f913af"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.27.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.27.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.27.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.27.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.28.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "320a114b8b64516fb3249460afede049"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.28.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "fc72c34f3cfc765b6375dbcca535b96a"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.28.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.28.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.28.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.28.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.28.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.28.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "9ab22e5d60c7435b68da7f03e7106cad"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.29.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c0ff2e355a82472b8974370bd3e7fd4c"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.28.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.28.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.29.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.29.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.29.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.29.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.29.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "fb8f9797f068c0298828ed9c2d4a4566"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.29.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.29.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.29.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.29.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.30.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "a5294071aa21f04e85e1a0c59bc74525"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.30.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "7b31d254c9c21e53bbc3bea5bf51f33c"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.30.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.30.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.30.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.30.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.30.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.30.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "c83efc89a07f98dc9b72518abb8043c3"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.31.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "bd2231f6cf51bbe192323ce8db8cea90"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.30.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.30.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.31.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.31.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.31.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.31.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.31.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "9919fa8b9fdca476fbd5dd8a47818d98"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 197001216,
"records": [
{
"name": "transformer.embd.weight",
"shape": [
32064,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 197001216,
"byteOffset": 0
}
],
"md5sum": "2794efed4f90da2679835636c49453c1"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 21245952,
"records": [
{
"name": "transformer.h.31.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.31.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.31.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.31.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.norm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
},
{
"name": "transformer.h.0.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21239808
}
],
"md5sum": "eae85a6f6d13bbc2b0ccd595ed5eff2b"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.0.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e6eb0123a3c85e5bbca235dbd8c5c3fa"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.0.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.0.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.0.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.0.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.0.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.0.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "ce2c5c9ccc0d61c0cd6f9283f12bfbf9"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.1.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "21439421bd3a84ee0beb817de25e60e0"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.0.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.0.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.1.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.1.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.1.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.1.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.1.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "382eb5b3039bc029ccae6d0f8875ac13"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.1.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.1.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.1.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.1.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.10.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "507763da4121a37358b02ef8b18b9013"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.10.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "8c12101248fda4e3575ffdbe95334d49"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.10.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.10.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.10.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.10.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.10.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.10.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "c92ddb01bfe30c9b0f13e91527f1f320"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.11.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "3caf1326ddab0ec51a7d27a20ac4d113"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.10.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.10.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.11.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.11.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.11.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.11.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.11.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "f18d75cced2934cd19cba99a55d38340"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.11.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.11.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.11.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.11.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.12.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "1b359d553060ed2e94b564a04a64a73f"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.12.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5f518c319eb30d0b7fb218fe2652e83b"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.12.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.12.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.12.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.12.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.12.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.12.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "ea36d76fc29032d183fbfc6a5cb45c5c"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.13.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "6feb028378bef3fd41dba769f9547e83"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.12.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.12.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.13.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.13.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.13.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.13.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.13.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "e6944b7f11f1e1b74ca7f3f8b2e7ef3a"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.13.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.13.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.13.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.13.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.14.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "9f62561220f2a9b7b02b0c96f387dd33"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.14.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b5843fae531488613b8b83b9d122fa8d"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.14.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.14.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.14.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.14.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.14.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.14.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "02076b9ed0382e21397b7e4aabce1b63"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.15.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b0c106c1413f808278008ac2e55b2193"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.14.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.14.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.15.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.15.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.15.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.15.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.15.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "38c80ca934aea3e3f7162e1a3ea4e7c7"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.15.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.15.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.15.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.15.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.16.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "16213b13e67d45074500013777bcda51"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.16.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "6467bad8b9e96358c8293ffbd53c7f5b"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.16.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.16.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.16.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.16.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.16.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.16.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "1ec08f4ea08ec22f933fc312ce32e6dd"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.17.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "dd492bf7ef9298f3063c8ce818a6cf6a"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.16.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.16.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.17.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.17.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.17.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.17.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.17.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "3af8f49cb9236613bdba2e563b9991b1"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.17.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.17.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.17.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.17.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.18.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "83b443d2d6b395e347f13f5c9e4a2281"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.18.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "155490cc705758fd968a474072ec12f9"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.18.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.18.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.18.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.18.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.18.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.18.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "315ed2cbdf6b372667af8884dac50a43"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.19.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c28a82094c928e15f20a7773cfd321fe"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.18.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.18.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.19.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.19.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.19.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.19.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.19.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "b58bd806db63e42d852610fc40fe546d"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.19.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.19.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.19.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.19.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.2.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "fdbc32e3ff969e6958ab52d646cab54e"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.2.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e865bf4acc25af42e4c8537a75db6ea9"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.2.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.2.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.2.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.2.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.2.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.2.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "f686421b2d21866225dd60e6a83ebb73"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.20.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "48c8ef3ca65f1149c624e1666a58e6d3"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.2.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.2.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.20.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.20.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.20.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.20.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.20.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "798cde9e77de2c977227d40956c56835"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 26548224,
"records": [
{
"name": "transformer.h.20.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.20.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.20.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.20.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.21.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 21233664
},
{
"name": "transformer.h.21.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 25952256
},
{
"name": "transformer.h.3.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 26542080
}
],
"md5sum": "6b88e2ef1cb0aedd434a3dd4dcac45de"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.3.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c2a9b1978ca3c48e6df9fc6a287f2bcc"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.3.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.3.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.3.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.3.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.3.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.3.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "c80ef431ed511eb6fc50b80ec6ece937"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.4.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "3c6cf859ae353724a10a6f24eabac1cb"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.3.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.3.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.4.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.4.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.4.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.4.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.4.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "097a950ce17f4a71a034a0a96b447f82"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.4.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.4.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.4.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.4.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.5.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "a426996d3377030d8c560eda9c288b38"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.5.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b766a0e3d97822af59648cf1aea2e28c"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.5.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.5.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.5.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.5.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.5.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.5.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "c8e98a45f4b4aa2c064cf111d7fc5f50"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.6.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "704e3af561a45f57e4cf784ddbfcc911"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.5.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.5.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.6.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.6.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.6.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.6.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.6.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "9948e767ec80afd174e37b74b338cecd"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.6.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.6.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.6.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.6.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.7.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "50751cd3e3a9029cda47981a0eecef24"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.7.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "888afbe3b8be8cea15694ce40396c697"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.7.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.7.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.7.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.7.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.7.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.7.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "7c7c7525455103bd73aadb9dc6ab497b"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.8.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a01583bddb2fe960dd515c6999e2eaaa"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "transformer.h.7.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.7.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "transformer.h.8.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 15925248
},
{
"name": "transformer.h.8.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 15931392
},
{
"name": "transformer.h.8.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28514304
},
{
"name": "transformer.h.8.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 30087168
},
{
"name": "transformer.h.8.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "61525dc85a9ee223b994f7fcbb3d603f"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 21239808,
"records": [
{
"name": "transformer.h.8.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "transformer.h.8.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 4718592
},
{
"name": "transformer.h.8.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 5308416
},
{
"name": "transformer.h.8.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 19464192
},
{
"name": "transformer.h.9.ln.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21233664
}
],
"md5sum": "7cbd0fbf591926805d0786ef1744c40d"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.h.9.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5e6f4084726dcc24f778539088738718"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 22616064,
"records": [
{
"name": "transformer.h.9.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "transformer.h.9.mlp.down_proj.q_scale",
"shape": [
3072,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "transformer.h.9.mlp.gate_up_proj.q_scale",
"shape": [
16384,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14155776
},
{
"name": "transformer.h.9.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17301504
},
{
"name": "transformer.h.9.mixer.out_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17307648
},
{
"name": "transformer.h.9.mixer.out_proj.q_scale",
"shape": [
3072,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 589824,
"byteOffset": 22026240
}
],
"md5sum": "43e4f58e29c6d3e88272b748ffbecda5"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 15925248,
"records": [
{
"name": "transformer.h.9.mixer.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "transformer.h.9.mixer.qkv_proj.q_scale",
"shape": [
9216,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
}
],
"md5sum": "accc477e9b197a31fdfbcb0c0d47b039"
}
]
}