junrushao's picture
Initial commit
c6ee1b1
{
"metadata": {
"ParamSize": 392,
"ParamBytes": 558430208.0,
"BitsPerParam": 3.6444754828825427
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 41000960,
"records": [
{
"name": "transformer.wte.q_weight",
"shape": [
49280,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41000960,
"byteOffset": 0
}
],
"md5sum": "606523ae3635513dfd6f17ee8295e81e"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 30484480,
"records": [
{
"name": "transformer.wte.q_scale",
"shape": [
49280,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5125120,
"byteOffset": 0
},
{
"name": "transformer.wpe.q_weight",
"shape": [
2048,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1703936,
"byteOffset": 5125120
},
{
"name": "transformer.wpe.q_scale",
"shape": [
2048,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 212992,
"byteOffset": 6829056
},
{
"name": "transformer.h.0.ln_1.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 7042048
},
{
"name": "transformer.h.0.ln_1.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 7046144
},
{
"name": "transformer.h.0.attn.c_attn.q_weight",
"shape": [
2304,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1916928,
"byteOffset": 7050240
},
{
"name": "transformer.h.0.attn.c_attn.q_scale",
"shape": [
2304,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 239616,
"byteOffset": 8967168
},
{
"name": "transformer.h.0.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 9206784
},
{
"name": "transformer.h.0.attn.c_proj.q_weight",
"shape": [
2048,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1703936,
"byteOffset": 9211392
},
{
"name": "transformer.h.0.attn.c_proj.q_scale",
"shape": [
2048,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 212992,
"byteOffset": 10915328
},
{
"name": "transformer.h.0.attn.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11128320
},
{
"name": "transformer.h.0.ln_2.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11132416
},
{
"name": "transformer.h.0.ln_2.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11136512
},
{
"name": "transformer.h.0.mlp.c_fc.q_weight",
"shape": [
8192,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6815744,
"byteOffset": 11140608
},
{
"name": "transformer.h.0.mlp.c_fc.q_scale",
"shape": [
8192,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 851968,
"byteOffset": 17956352
},
{
"name": "transformer.h.0.mlp.c_fc.bias",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 18808320
},
{
"name": "transformer.h.0.mlp.c_proj.q_weight",
"shape": [
2048,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6717440,
"byteOffset": 18824704
},
{
"name": "transformer.h.0.mlp.c_proj.q_scale",
"shape": [
2048,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 839680,
"byteOffset": 25542144
},
{
"name": "transformer.h.0.mlp.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 26381824
},
{
"name": "transformer.h.1.ln_1.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 26385920
},
{
"name": "transformer.h.1.ln_1.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 26390016
},
{
"name": "transformer.h.1.attn.c_attn.q_weight",
"shape": [
2304,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1916928,
"byteOffset": 26394112
},
{
"name": "transformer.h.1.attn.c_attn.q_scale",
"shape": [
2304,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 239616,
"byteOffset": 28311040
},
{
"name": "transformer.h.1.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 28550656
},
{
"name": "transformer.h.1.attn.c_proj.q_weight",
"shape": [
2048,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1703936,
"byteOffset": 28555264
},
{
"name": "transformer.h.1.attn.c_proj.q_scale",
"shape": [
2048,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 212992,
"byteOffset": 30259200
},
{
"name": "transformer.h.1.attn.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30472192
},
{
"name": "transformer.h.1.ln_2.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30476288
},
{
"name": "transformer.h.1.ln_2.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30480384
}
],
"md5sum": "8ce048787f9afbc33ba261c2ad6151c2"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 27027968,
"records": [
{
"name": "transformer.h.1.mlp.c_fc.q_weight",
"shape": [
8192,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6815744,
"byteOffset": 0
},
{
"name": "transformer.h.1.mlp.c_fc.q_scale",
"shape": [
8192,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 851968,
"byteOffset": 6815744
},
{
"name": "transformer.h.1.mlp.c_fc.bias",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 7667712
},
{
"name": "transformer.h.1.mlp.c_proj.q_weight",
"shape": [
2048,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6717440,
"byteOffset": 7684096
},
{
"name": "transformer.h.1.mlp.c_proj.q_scale",
"shape": [
2048,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 839680,
"byteOffset": 14401536
},
{
"name": "transformer.h.1.mlp.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15241216
},
{
"name": "transformer.h.2.ln_1.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15245312
},
{
"name": "transformer.h.2.ln_1.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15249408
},
{
"name": "transformer.h.2.attn.c_attn.q_weight",
"shape": [
2304,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1916928,
"byteOffset": 15253504
},
{
"name": "transformer.h.2.attn.c_attn.q_scale",
"shape": [
2304,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 239616,
"byteOffset": 17170432
},
{
"name": "transformer.h.2.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 17410048
},
{
"name": "transformer.h.2.attn.c_proj.q_weight",
"shape": [
2048,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1703936,
"byteOffset": 17414656
},
{
"name": "transformer.h.2.attn.c_proj.q_scale",
"shape": [
2048,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 212992,
"byteOffset": 19118592
},
{
"name": "transformer.h.2.attn.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19331584
},
{
"name": "transformer.h.2.ln_2.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19335680
},
{
"name": "transformer.h.2.ln_2.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19339776
},
{
"name": "transformer.h.2.mlp.c_fc.q_weight",
"shape": [
8192,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6815744,
"byteOffset": 19343872
},
{
"name": "transformer.h.2.mlp.c_fc.q_scale",
"shape": [
8192,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 851968,
"byteOffset": 26159616
},
{
"name": "transformer.h.2.mlp.c_fc.bias",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27011584
}
],
"md5sum": "1ab45180b7a59d08ff8d26dc9ffd8051"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 31003648,
"records": [
{
"name": "transformer.h.2.mlp.c_proj.q_weight",
"shape": [
2048,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6717440,
"byteOffset": 0
},
{
"name": "transformer.h.2.mlp.c_proj.q_scale",
"shape": [
2048,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 839680,
"byteOffset": 6717440
},
{
"name": "transformer.h.2.mlp.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 7557120
},
{
"name": "transformer.h.3.ln_1.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 7561216
},
{
"name": "transformer.h.3.ln_1.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 7565312
},
{
"name": "transformer.h.3.attn.c_attn.q_weight",
"shape": [
2304,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1916928,
"byteOffset": 7569408
},
{
"name": "transformer.h.3.attn.c_attn.q_scale",
"shape": [
2304,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 239616,
"byteOffset": 9486336
},
{
"name": "transformer.h.3.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 9725952
},
{
"name": "transformer.h.3.attn.c_proj.q_weight",
"shape": [
2048,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1703936,
"byteOffset": 9730560
},
{
"name": "transformer.h.3.attn.c_proj.q_scale",
"shape": [
2048,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 212992,
"byteOffset": 11434496
},
{
"name": "transformer.h.3.attn.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11647488
},
{
"name": "transformer.h.3.ln_2.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11651584
},
{
"name": "transformer.h.3.ln_2.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11655680
},
{
"name": "transformer.h.3.mlp.c_fc.q_weight",
"shape": [
8192,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6815744,
"byteOffset": 11659776
},
{
"name": "transformer.h.3.mlp.c_fc.q_scale",
"shape": [
8192,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 851968,
"byteOffset": 18475520
},
{
"name": "transformer.h.3.mlp.c_fc.bias",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 19327488
},
{
"name": "transformer.h.3.mlp.c_proj.q_weight",
"shape": [
2048,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6717440,
"byteOffset": 19343872
},
{
"name": "transformer.h.3.mlp.c_proj.q_scale",
"shape": [
2048,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 839680,
"byteOffset": 26061312
},
{
"name": "transformer.h.3.mlp.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 26900992
},
{
"name": "transformer.h.4.ln_1.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 26905088
},
{
"name": "transformer.h.4.ln_1.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 26909184
},
{
"name": "transformer.h.4.attn.c_attn.q_weight",
"shape": [
2304,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1916928,
"byteOffset": 26913280
},
{
"name": "transformer.h.4.attn.c_attn.q_scale",
"shape": [
2304,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 239616,
"byteOffset": 28830208
},
{
"name": "transformer.h.4.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 29069824
},
{
"name": "transformer.h.4.attn.c_proj.q_weight",
"shape": [
2048,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1703936,
"byteOffset": 29074432
},
{
"name": "transformer.h.4.attn.c_proj.q_scale",
"shape": [
2048,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 212992,
"byteOffset": 30778368
},
{
"name": "transformer.h.4.attn.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30991360
},
{
"name": "transformer.h.4.ln_2.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30995456
},
{
"name": "transformer.h.4.ln_2.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30999552
}
],
"md5sum": "8e07bc57bf1f1940f02ec372702969af"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 27027968,
"records": [
{
"name": "transformer.h.4.mlp.c_fc.q_weight",
"shape": [
8192,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6815744,
"byteOffset": 0
},
{
"name": "transformer.h.4.mlp.c_fc.q_scale",
"shape": [
8192,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 851968,
"byteOffset": 6815744
},
{
"name": "transformer.h.4.mlp.c_fc.bias",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 7667712
},
{
"name": "transformer.h.4.mlp.c_proj.q_weight",
"shape": [
2048,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6717440,
"byteOffset": 7684096
},
{
"name": "transformer.h.4.mlp.c_proj.q_scale",
"shape": [
2048,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 839680,
"byteOffset": 14401536
},
{
"name": "transformer.h.4.mlp.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15241216
},
{
"name": "transformer.h.5.ln_1.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15245312
},
{
"name": "transformer.h.5.ln_1.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15249408
},
{
"name": "transformer.h.5.attn.c_attn.q_weight",
"shape": [
2304,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1916928,
"byteOffset": 15253504
},
{
"name": "transformer.h.5.attn.c_attn.q_scale",
"shape": [
2304,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 239616,
"byteOffset": 17170432
},
{
"name": "transformer.h.5.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 17410048
},
{
"name": "transformer.h.5.attn.c_proj.q_weight",
"shape": [
2048,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1703936,
"byteOffset": 17414656
},
{
"name": "transformer.h.5.attn.c_proj.q_scale",
"shape": [
2048,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 212992,
"byteOffset": 19118592
},
{
"name": "transformer.h.5.attn.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19331584
},
{
"name": "transformer.h.5.ln_2.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19335680
},
{
"name": "transformer.h.5.ln_2.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19339776
},
{
"name": "transformer.h.5.mlp.c_fc.q_weight",
"shape": [
8192,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6815744,
"byteOffset": 19343872
},
{
"name": "transformer.h.5.mlp.c_fc.q_scale",
"shape": [
8192,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 851968,
"byteOffset": 26159616
},
{
"name": "transformer.h.5.mlp.c_fc.bias",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27011584
}
],
"md5sum": "6a28592dd26102827fa7a616df3397d5"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 31003648,
"records": [
{
"name": "transformer.h.5.mlp.c_proj.q_weight",
"shape": [
2048,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6717440,
"byteOffset": 0
},
{
"name": "transformer.h.5.mlp.c_proj.q_scale",
"shape": [
2048,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 839680,
"byteOffset": 6717440
},
{
"name": "transformer.h.5.mlp.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 7557120
},
{
"name": "transformer.h.6.ln_1.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 7561216
},
{
"name": "transformer.h.6.ln_1.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 7565312
},
{
"name": "transformer.h.6.attn.c_attn.q_weight",
"shape": [
2304,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1916928,
"byteOffset": 7569408
},
{
"name": "transformer.h.6.attn.c_attn.q_scale",
"shape": [
2304,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 239616,
"byteOffset": 9486336
},
{
"name": "transformer.h.6.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 9725952
},
{
"name": "transformer.h.6.attn.c_proj.q_weight",
"shape": [
2048,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1703936,
"byteOffset": 9730560
},
{
"name": "transformer.h.6.attn.c_proj.q_scale",
"shape": [
2048,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 212992,
"byteOffset": 11434496
},
{
"name": "transformer.h.6.attn.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11647488
},
{
"name": "transformer.h.6.ln_2.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11651584
},
{
"name": "transformer.h.6.ln_2.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11655680
},
{
"name": "transformer.h.6.mlp.c_fc.q_weight",
"shape": [
8192,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6815744,
"byteOffset": 11659776
},
{
"name": "transformer.h.6.mlp.c_fc.q_scale",
"shape": [
8192,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 851968,
"byteOffset": 18475520
},
{
"name": "transformer.h.6.mlp.c_fc.bias",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 19327488
},
{
"name": "transformer.h.6.mlp.c_proj.q_weight",
"shape": [
2048,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6717440,
"byteOffset": 19343872
},
{
"name": "transformer.h.6.mlp.c_proj.q_scale",
"shape": [
2048,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 839680,
"byteOffset": 26061312
},
{
"name": "transformer.h.6.mlp.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 26900992
},
{
"name": "transformer.h.7.ln_1.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 26905088
},
{
"name": "transformer.h.7.ln_1.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 26909184
},
{
"name": "transformer.h.7.attn.c_attn.q_weight",
"shape": [
2304,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1916928,
"byteOffset": 26913280
},
{
"name": "transformer.h.7.attn.c_attn.q_scale",
"shape": [
2304,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 239616,
"byteOffset": 28830208
},
{
"name": "transformer.h.7.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 29069824
},
{
"name": "transformer.h.7.attn.c_proj.q_weight",
"shape": [
2048,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1703936,
"byteOffset": 29074432
},
{
"name": "transformer.h.7.attn.c_proj.q_scale",
"shape": [
2048,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 212992,
"byteOffset": 30778368
},
{
"name": "transformer.h.7.attn.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30991360
},
{
"name": "transformer.h.7.ln_2.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30995456
},
{
"name": "transformer.h.7.ln_2.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30999552
}
],
"md5sum": "f06f7a7f3d2f73a2a273265f78ba6843"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 27027968,
"records": [
{
"name": "transformer.h.7.mlp.c_fc.q_weight",
"shape": [
8192,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6815744,
"byteOffset": 0
},
{
"name": "transformer.h.7.mlp.c_fc.q_scale",
"shape": [
8192,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 851968,
"byteOffset": 6815744
},
{
"name": "transformer.h.7.mlp.c_fc.bias",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 7667712
},
{
"name": "transformer.h.7.mlp.c_proj.q_weight",
"shape": [
2048,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6717440,
"byteOffset": 7684096
},
{
"name": "transformer.h.7.mlp.c_proj.q_scale",
"shape": [
2048,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 839680,
"byteOffset": 14401536
},
{
"name": "transformer.h.7.mlp.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15241216
},
{
"name": "transformer.h.8.ln_1.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15245312
},
{
"name": "transformer.h.8.ln_1.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15249408
},
{
"name": "transformer.h.8.attn.c_attn.q_weight",
"shape": [
2304,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1916928,
"byteOffset": 15253504
},
{
"name": "transformer.h.8.attn.c_attn.q_scale",
"shape": [
2304,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 239616,
"byteOffset": 17170432
},
{
"name": "transformer.h.8.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 17410048
},
{
"name": "transformer.h.8.attn.c_proj.q_weight",
"shape": [
2048,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1703936,
"byteOffset": 17414656
},
{
"name": "transformer.h.8.attn.c_proj.q_scale",
"shape": [
2048,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 212992,
"byteOffset": 19118592
},
{
"name": "transformer.h.8.attn.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19331584
},
{
"name": "transformer.h.8.ln_2.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19335680
},
{
"name": "transformer.h.8.ln_2.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19339776
},
{
"name": "transformer.h.8.mlp.c_fc.q_weight",
"shape": [
8192,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6815744,
"byteOffset": 19343872
},
{
"name": "transformer.h.8.mlp.c_fc.q_scale",
"shape": [
8192,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 851968,
"byteOffset": 26159616
},
{
"name": "transformer.h.8.mlp.c_fc.bias",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27011584
}
],
"md5sum": "a1951a7ba32dc40bb55c0877f64ea66a"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 31003648,
"records": [
{
"name": "transformer.h.8.mlp.c_proj.q_weight",
"shape": [
2048,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6717440,
"byteOffset": 0
},
{
"name": "transformer.h.8.mlp.c_proj.q_scale",
"shape": [
2048,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 839680,
"byteOffset": 6717440
},
{
"name": "transformer.h.8.mlp.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 7557120
},
{
"name": "transformer.h.9.ln_1.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 7561216
},
{
"name": "transformer.h.9.ln_1.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 7565312
},
{
"name": "transformer.h.9.attn.c_attn.q_weight",
"shape": [
2304,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1916928,
"byteOffset": 7569408
},
{
"name": "transformer.h.9.attn.c_attn.q_scale",
"shape": [
2304,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 239616,
"byteOffset": 9486336
},
{
"name": "transformer.h.9.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 9725952
},
{
"name": "transformer.h.9.attn.c_proj.q_weight",
"shape": [
2048,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1703936,
"byteOffset": 9730560
},
{
"name": "transformer.h.9.attn.c_proj.q_scale",
"shape": [
2048,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 212992,
"byteOffset": 11434496
},
{
"name": "transformer.h.9.attn.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11647488
},
{
"name": "transformer.h.9.ln_2.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11651584
},
{
"name": "transformer.h.9.ln_2.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11655680
},
{
"name": "transformer.h.9.mlp.c_fc.q_weight",
"shape": [
8192,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6815744,
"byteOffset": 11659776
},
{
"name": "transformer.h.9.mlp.c_fc.q_scale",
"shape": [
8192,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 851968,
"byteOffset": 18475520
},
{
"name": "transformer.h.9.mlp.c_fc.bias",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 19327488
},
{
"name": "transformer.h.9.mlp.c_proj.q_weight",
"shape": [
2048,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6717440,
"byteOffset": 19343872
},
{
"name": "transformer.h.9.mlp.c_proj.q_scale",
"shape": [
2048,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 839680,
"byteOffset": 26061312
},
{
"name": "transformer.h.9.mlp.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 26900992
},
{
"name": "transformer.h.10.ln_1.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 26905088
},
{
"name": "transformer.h.10.ln_1.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 26909184
},
{
"name": "transformer.h.10.attn.c_attn.q_weight",
"shape": [
2304,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1916928,
"byteOffset": 26913280
},
{
"name": "transformer.h.10.attn.c_attn.q_scale",
"shape": [
2304,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 239616,
"byteOffset": 28830208
},
{
"name": "transformer.h.10.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 29069824
},
{
"name": "transformer.h.10.attn.c_proj.q_weight",
"shape": [
2048,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1703936,
"byteOffset": 29074432
},
{
"name": "transformer.h.10.attn.c_proj.q_scale",
"shape": [
2048,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 212992,
"byteOffset": 30778368
},
{
"name": "transformer.h.10.attn.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30991360
},
{
"name": "transformer.h.10.ln_2.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30995456
},
{
"name": "transformer.h.10.ln_2.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30999552
}
],
"md5sum": "f65c7d31ab02fa807d10b4019146e248"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 27027968,
"records": [
{
"name": "transformer.h.10.mlp.c_fc.q_weight",
"shape": [
8192,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6815744,
"byteOffset": 0
},
{
"name": "transformer.h.10.mlp.c_fc.q_scale",
"shape": [
8192,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 851968,
"byteOffset": 6815744
},
{
"name": "transformer.h.10.mlp.c_fc.bias",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 7667712
},
{
"name": "transformer.h.10.mlp.c_proj.q_weight",
"shape": [
2048,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6717440,
"byteOffset": 7684096
},
{
"name": "transformer.h.10.mlp.c_proj.q_scale",
"shape": [
2048,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 839680,
"byteOffset": 14401536
},
{
"name": "transformer.h.10.mlp.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15241216
},
{
"name": "transformer.h.11.ln_1.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15245312
},
{
"name": "transformer.h.11.ln_1.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15249408
},
{
"name": "transformer.h.11.attn.c_attn.q_weight",
"shape": [
2304,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1916928,
"byteOffset": 15253504
},
{
"name": "transformer.h.11.attn.c_attn.q_scale",
"shape": [
2304,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 239616,
"byteOffset": 17170432
},
{
"name": "transformer.h.11.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 17410048
},
{
"name": "transformer.h.11.attn.c_proj.q_weight",
"shape": [
2048,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1703936,
"byteOffset": 17414656
},
{
"name": "transformer.h.11.attn.c_proj.q_scale",
"shape": [
2048,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 212992,
"byteOffset": 19118592
},
{
"name": "transformer.h.11.attn.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19331584
},
{
"name": "transformer.h.11.ln_2.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19335680
},
{
"name": "transformer.h.11.ln_2.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19339776
},
{
"name": "transformer.h.11.mlp.c_fc.q_weight",
"shape": [
8192,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6815744,
"byteOffset": 19343872
},
{
"name": "transformer.h.11.mlp.c_fc.q_scale",
"shape": [
8192,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 851968,
"byteOffset": 26159616
},
{
"name": "transformer.h.11.mlp.c_fc.bias",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27011584
}
],
"md5sum": "b212963b5c82c5c3c02cf96d37508fd2"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 31003648,
"records": [
{
"name": "transformer.h.11.mlp.c_proj.q_weight",
"shape": [
2048,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6717440,
"byteOffset": 0
},
{
"name": "transformer.h.11.mlp.c_proj.q_scale",
"shape": [
2048,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 839680,
"byteOffset": 6717440
},
{
"name": "transformer.h.11.mlp.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 7557120
},
{
"name": "transformer.h.12.ln_1.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 7561216
},
{
"name": "transformer.h.12.ln_1.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 7565312
},
{
"name": "transformer.h.12.attn.c_attn.q_weight",
"shape": [
2304,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1916928,
"byteOffset": 7569408
},
{
"name": "transformer.h.12.attn.c_attn.q_scale",
"shape": [
2304,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 239616,
"byteOffset": 9486336
},
{
"name": "transformer.h.12.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 9725952
},
{
"name": "transformer.h.12.attn.c_proj.q_weight",
"shape": [
2048,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1703936,
"byteOffset": 9730560
},
{
"name": "transformer.h.12.attn.c_proj.q_scale",
"shape": [
2048,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 212992,
"byteOffset": 11434496
},
{
"name": "transformer.h.12.attn.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11647488
},
{
"name": "transformer.h.12.ln_2.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11651584
},
{
"name": "transformer.h.12.ln_2.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11655680
},
{
"name": "transformer.h.12.mlp.c_fc.q_weight",
"shape": [
8192,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6815744,
"byteOffset": 11659776
},
{
"name": "transformer.h.12.mlp.c_fc.q_scale",
"shape": [
8192,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 851968,
"byteOffset": 18475520
},
{
"name": "transformer.h.12.mlp.c_fc.bias",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 19327488
},
{
"name": "transformer.h.12.mlp.c_proj.q_weight",
"shape": [
2048,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6717440,
"byteOffset": 19343872
},
{
"name": "transformer.h.12.mlp.c_proj.q_scale",
"shape": [
2048,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 839680,
"byteOffset": 26061312
},
{
"name": "transformer.h.12.mlp.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 26900992
},
{
"name": "transformer.h.13.ln_1.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 26905088
},
{
"name": "transformer.h.13.ln_1.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 26909184
},
{
"name": "transformer.h.13.attn.c_attn.q_weight",
"shape": [
2304,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1916928,
"byteOffset": 26913280
},
{
"name": "transformer.h.13.attn.c_attn.q_scale",
"shape": [
2304,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 239616,
"byteOffset": 28830208
},
{
"name": "transformer.h.13.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 29069824
},
{
"name": "transformer.h.13.attn.c_proj.q_weight",
"shape": [
2048,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1703936,
"byteOffset": 29074432
},
{
"name": "transformer.h.13.attn.c_proj.q_scale",
"shape": [
2048,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 212992,
"byteOffset": 30778368
},
{
"name": "transformer.h.13.attn.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30991360
},
{
"name": "transformer.h.13.ln_2.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30995456
},
{
"name": "transformer.h.13.ln_2.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30999552
}
],
"md5sum": "a0675f5284e954f8974036cdad0049fb"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 27027968,
"records": [
{
"name": "transformer.h.13.mlp.c_fc.q_weight",
"shape": [
8192,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6815744,
"byteOffset": 0
},
{
"name": "transformer.h.13.mlp.c_fc.q_scale",
"shape": [
8192,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 851968,
"byteOffset": 6815744
},
{
"name": "transformer.h.13.mlp.c_fc.bias",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 7667712
},
{
"name": "transformer.h.13.mlp.c_proj.q_weight",
"shape": [
2048,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6717440,
"byteOffset": 7684096
},
{
"name": "transformer.h.13.mlp.c_proj.q_scale",
"shape": [
2048,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 839680,
"byteOffset": 14401536
},
{
"name": "transformer.h.13.mlp.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15241216
},
{
"name": "transformer.h.14.ln_1.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15245312
},
{
"name": "transformer.h.14.ln_1.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15249408
},
{
"name": "transformer.h.14.attn.c_attn.q_weight",
"shape": [
2304,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1916928,
"byteOffset": 15253504
},
{
"name": "transformer.h.14.attn.c_attn.q_scale",
"shape": [
2304,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 239616,
"byteOffset": 17170432
},
{
"name": "transformer.h.14.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 17410048
},
{
"name": "transformer.h.14.attn.c_proj.q_weight",
"shape": [
2048,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1703936,
"byteOffset": 17414656
},
{
"name": "transformer.h.14.attn.c_proj.q_scale",
"shape": [
2048,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 212992,
"byteOffset": 19118592
},
{
"name": "transformer.h.14.attn.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19331584
},
{
"name": "transformer.h.14.ln_2.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19335680
},
{
"name": "transformer.h.14.ln_2.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19339776
},
{
"name": "transformer.h.14.mlp.c_fc.q_weight",
"shape": [
8192,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6815744,
"byteOffset": 19343872
},
{
"name": "transformer.h.14.mlp.c_fc.q_scale",
"shape": [
8192,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 851968,
"byteOffset": 26159616
},
{
"name": "transformer.h.14.mlp.c_fc.bias",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27011584
}
],
"md5sum": "cbe10c01e3fc3e1827997af59d861869"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 31003648,
"records": [
{
"name": "transformer.h.14.mlp.c_proj.q_weight",
"shape": [
2048,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6717440,
"byteOffset": 0
},
{
"name": "transformer.h.14.mlp.c_proj.q_scale",
"shape": [
2048,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 839680,
"byteOffset": 6717440
},
{
"name": "transformer.h.14.mlp.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 7557120
},
{
"name": "transformer.h.15.ln_1.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 7561216
},
{
"name": "transformer.h.15.ln_1.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 7565312
},
{
"name": "transformer.h.15.attn.c_attn.q_weight",
"shape": [
2304,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1916928,
"byteOffset": 7569408
},
{
"name": "transformer.h.15.attn.c_attn.q_scale",
"shape": [
2304,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 239616,
"byteOffset": 9486336
},
{
"name": "transformer.h.15.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 9725952
},
{
"name": "transformer.h.15.attn.c_proj.q_weight",
"shape": [
2048,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1703936,
"byteOffset": 9730560
},
{
"name": "transformer.h.15.attn.c_proj.q_scale",
"shape": [
2048,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 212992,
"byteOffset": 11434496
},
{
"name": "transformer.h.15.attn.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11647488
},
{
"name": "transformer.h.15.ln_2.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11651584
},
{
"name": "transformer.h.15.ln_2.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11655680
},
{
"name": "transformer.h.15.mlp.c_fc.q_weight",
"shape": [
8192,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6815744,
"byteOffset": 11659776
},
{
"name": "transformer.h.15.mlp.c_fc.q_scale",
"shape": [
8192,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 851968,
"byteOffset": 18475520
},
{
"name": "transformer.h.15.mlp.c_fc.bias",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 19327488
},
{
"name": "transformer.h.15.mlp.c_proj.q_weight",
"shape": [
2048,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6717440,
"byteOffset": 19343872
},
{
"name": "transformer.h.15.mlp.c_proj.q_scale",
"shape": [
2048,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 839680,
"byteOffset": 26061312
},
{
"name": "transformer.h.15.mlp.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 26900992
},
{
"name": "transformer.h.16.ln_1.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 26905088
},
{
"name": "transformer.h.16.ln_1.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 26909184
},
{
"name": "transformer.h.16.attn.c_attn.q_weight",
"shape": [
2304,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1916928,
"byteOffset": 26913280
},
{
"name": "transformer.h.16.attn.c_attn.q_scale",
"shape": [
2304,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 239616,
"byteOffset": 28830208
},
{
"name": "transformer.h.16.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 29069824
},
{
"name": "transformer.h.16.attn.c_proj.q_weight",
"shape": [
2048,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1703936,
"byteOffset": 29074432
},
{
"name": "transformer.h.16.attn.c_proj.q_scale",
"shape": [
2048,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 212992,
"byteOffset": 30778368
},
{
"name": "transformer.h.16.attn.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30991360
},
{
"name": "transformer.h.16.ln_2.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30995456
},
{
"name": "transformer.h.16.ln_2.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30999552
}
],
"md5sum": "0dae6e43d6550288de886eceb7dfb53e"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 27027968,
"records": [
{
"name": "transformer.h.16.mlp.c_fc.q_weight",
"shape": [
8192,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6815744,
"byteOffset": 0
},
{
"name": "transformer.h.16.mlp.c_fc.q_scale",
"shape": [
8192,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 851968,
"byteOffset": 6815744
},
{
"name": "transformer.h.16.mlp.c_fc.bias",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 7667712
},
{
"name": "transformer.h.16.mlp.c_proj.q_weight",
"shape": [
2048,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6717440,
"byteOffset": 7684096
},
{
"name": "transformer.h.16.mlp.c_proj.q_scale",
"shape": [
2048,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 839680,
"byteOffset": 14401536
},
{
"name": "transformer.h.16.mlp.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15241216
},
{
"name": "transformer.h.17.ln_1.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15245312
},
{
"name": "transformer.h.17.ln_1.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15249408
},
{
"name": "transformer.h.17.attn.c_attn.q_weight",
"shape": [
2304,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1916928,
"byteOffset": 15253504
},
{
"name": "transformer.h.17.attn.c_attn.q_scale",
"shape": [
2304,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 239616,
"byteOffset": 17170432
},
{
"name": "transformer.h.17.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 17410048
},
{
"name": "transformer.h.17.attn.c_proj.q_weight",
"shape": [
2048,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1703936,
"byteOffset": 17414656
},
{
"name": "transformer.h.17.attn.c_proj.q_scale",
"shape": [
2048,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 212992,
"byteOffset": 19118592
},
{
"name": "transformer.h.17.attn.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19331584
},
{
"name": "transformer.h.17.ln_2.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19335680
},
{
"name": "transformer.h.17.ln_2.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19339776
},
{
"name": "transformer.h.17.mlp.c_fc.q_weight",
"shape": [
8192,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6815744,
"byteOffset": 19343872
},
{
"name": "transformer.h.17.mlp.c_fc.q_scale",
"shape": [
8192,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 851968,
"byteOffset": 26159616
},
{
"name": "transformer.h.17.mlp.c_fc.bias",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27011584
}
],
"md5sum": "4b15f38ee7b07220edc1330d5a1ac1dc"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 31003648,
"records": [
{
"name": "transformer.h.17.mlp.c_proj.q_weight",
"shape": [
2048,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6717440,
"byteOffset": 0
},
{
"name": "transformer.h.17.mlp.c_proj.q_scale",
"shape": [
2048,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 839680,
"byteOffset": 6717440
},
{
"name": "transformer.h.17.mlp.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 7557120
},
{
"name": "transformer.h.18.ln_1.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 7561216
},
{
"name": "transformer.h.18.ln_1.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 7565312
},
{
"name": "transformer.h.18.attn.c_attn.q_weight",
"shape": [
2304,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1916928,
"byteOffset": 7569408
},
{
"name": "transformer.h.18.attn.c_attn.q_scale",
"shape": [
2304,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 239616,
"byteOffset": 9486336
},
{
"name": "transformer.h.18.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 9725952
},
{
"name": "transformer.h.18.attn.c_proj.q_weight",
"shape": [
2048,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1703936,
"byteOffset": 9730560
},
{
"name": "transformer.h.18.attn.c_proj.q_scale",
"shape": [
2048,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 212992,
"byteOffset": 11434496
},
{
"name": "transformer.h.18.attn.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11647488
},
{
"name": "transformer.h.18.ln_2.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11651584
},
{
"name": "transformer.h.18.ln_2.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11655680
},
{
"name": "transformer.h.18.mlp.c_fc.q_weight",
"shape": [
8192,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6815744,
"byteOffset": 11659776
},
{
"name": "transformer.h.18.mlp.c_fc.q_scale",
"shape": [
8192,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 851968,
"byteOffset": 18475520
},
{
"name": "transformer.h.18.mlp.c_fc.bias",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 19327488
},
{
"name": "transformer.h.18.mlp.c_proj.q_weight",
"shape": [
2048,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6717440,
"byteOffset": 19343872
},
{
"name": "transformer.h.18.mlp.c_proj.q_scale",
"shape": [
2048,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 839680,
"byteOffset": 26061312
},
{
"name": "transformer.h.18.mlp.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 26900992
},
{
"name": "transformer.h.19.ln_1.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 26905088
},
{
"name": "transformer.h.19.ln_1.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 26909184
},
{
"name": "transformer.h.19.attn.c_attn.q_weight",
"shape": [
2304,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1916928,
"byteOffset": 26913280
},
{
"name": "transformer.h.19.attn.c_attn.q_scale",
"shape": [
2304,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 239616,
"byteOffset": 28830208
},
{
"name": "transformer.h.19.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 29069824
},
{
"name": "transformer.h.19.attn.c_proj.q_weight",
"shape": [
2048,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1703936,
"byteOffset": 29074432
},
{
"name": "transformer.h.19.attn.c_proj.q_scale",
"shape": [
2048,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 212992,
"byteOffset": 30778368
},
{
"name": "transformer.h.19.attn.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30991360
},
{
"name": "transformer.h.19.ln_2.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30995456
},
{
"name": "transformer.h.19.ln_2.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30999552
}
],
"md5sum": "8e1f308884d460271e4a169ee5902ba6"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 27027968,
"records": [
{
"name": "transformer.h.19.mlp.c_fc.q_weight",
"shape": [
8192,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6815744,
"byteOffset": 0
},
{
"name": "transformer.h.19.mlp.c_fc.q_scale",
"shape": [
8192,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 851968,
"byteOffset": 6815744
},
{
"name": "transformer.h.19.mlp.c_fc.bias",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 7667712
},
{
"name": "transformer.h.19.mlp.c_proj.q_weight",
"shape": [
2048,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6717440,
"byteOffset": 7684096
},
{
"name": "transformer.h.19.mlp.c_proj.q_scale",
"shape": [
2048,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 839680,
"byteOffset": 14401536
},
{
"name": "transformer.h.19.mlp.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15241216
},
{
"name": "transformer.h.20.ln_1.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15245312
},
{
"name": "transformer.h.20.ln_1.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15249408
},
{
"name": "transformer.h.20.attn.c_attn.q_weight",
"shape": [
2304,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1916928,
"byteOffset": 15253504
},
{
"name": "transformer.h.20.attn.c_attn.q_scale",
"shape": [
2304,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 239616,
"byteOffset": 17170432
},
{
"name": "transformer.h.20.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 17410048
},
{
"name": "transformer.h.20.attn.c_proj.q_weight",
"shape": [
2048,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1703936,
"byteOffset": 17414656
},
{
"name": "transformer.h.20.attn.c_proj.q_scale",
"shape": [
2048,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 212992,
"byteOffset": 19118592
},
{
"name": "transformer.h.20.attn.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19331584
},
{
"name": "transformer.h.20.ln_2.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19335680
},
{
"name": "transformer.h.20.ln_2.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19339776
},
{
"name": "transformer.h.20.mlp.c_fc.q_weight",
"shape": [
8192,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6815744,
"byteOffset": 19343872
},
{
"name": "transformer.h.20.mlp.c_fc.q_scale",
"shape": [
8192,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 851968,
"byteOffset": 26159616
},
{
"name": "transformer.h.20.mlp.c_fc.bias",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27011584
}
],
"md5sum": "cddf375922759cb0e1ce296fc360285b"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 31003648,
"records": [
{
"name": "transformer.h.20.mlp.c_proj.q_weight",
"shape": [
2048,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6717440,
"byteOffset": 0
},
{
"name": "transformer.h.20.mlp.c_proj.q_scale",
"shape": [
2048,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 839680,
"byteOffset": 6717440
},
{
"name": "transformer.h.20.mlp.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 7557120
},
{
"name": "transformer.h.21.ln_1.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 7561216
},
{
"name": "transformer.h.21.ln_1.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 7565312
},
{
"name": "transformer.h.21.attn.c_attn.q_weight",
"shape": [
2304,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1916928,
"byteOffset": 7569408
},
{
"name": "transformer.h.21.attn.c_attn.q_scale",
"shape": [
2304,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 239616,
"byteOffset": 9486336
},
{
"name": "transformer.h.21.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 9725952
},
{
"name": "transformer.h.21.attn.c_proj.q_weight",
"shape": [
2048,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1703936,
"byteOffset": 9730560
},
{
"name": "transformer.h.21.attn.c_proj.q_scale",
"shape": [
2048,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 212992,
"byteOffset": 11434496
},
{
"name": "transformer.h.21.attn.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11647488
},
{
"name": "transformer.h.21.ln_2.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11651584
},
{
"name": "transformer.h.21.ln_2.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11655680
},
{
"name": "transformer.h.21.mlp.c_fc.q_weight",
"shape": [
8192,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6815744,
"byteOffset": 11659776
},
{
"name": "transformer.h.21.mlp.c_fc.q_scale",
"shape": [
8192,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 851968,
"byteOffset": 18475520
},
{
"name": "transformer.h.21.mlp.c_fc.bias",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 19327488
},
{
"name": "transformer.h.21.mlp.c_proj.q_weight",
"shape": [
2048,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6717440,
"byteOffset": 19343872
},
{
"name": "transformer.h.21.mlp.c_proj.q_scale",
"shape": [
2048,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 839680,
"byteOffset": 26061312
},
{
"name": "transformer.h.21.mlp.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 26900992
},
{
"name": "transformer.h.22.ln_1.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 26905088
},
{
"name": "transformer.h.22.ln_1.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 26909184
},
{
"name": "transformer.h.22.attn.c_attn.q_weight",
"shape": [
2304,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1916928,
"byteOffset": 26913280
},
{
"name": "transformer.h.22.attn.c_attn.q_scale",
"shape": [
2304,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 239616,
"byteOffset": 28830208
},
{
"name": "transformer.h.22.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 29069824
},
{
"name": "transformer.h.22.attn.c_proj.q_weight",
"shape": [
2048,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1703936,
"byteOffset": 29074432
},
{
"name": "transformer.h.22.attn.c_proj.q_scale",
"shape": [
2048,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 212992,
"byteOffset": 30778368
},
{
"name": "transformer.h.22.attn.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30991360
},
{
"name": "transformer.h.22.ln_2.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30995456
},
{
"name": "transformer.h.22.ln_2.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 30999552
}
],
"md5sum": "4b195d379c0b42d77e987ed6e53bf7a6"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 27027968,
"records": [
{
"name": "transformer.h.22.mlp.c_fc.q_weight",
"shape": [
8192,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6815744,
"byteOffset": 0
},
{
"name": "transformer.h.22.mlp.c_fc.q_scale",
"shape": [
8192,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 851968,
"byteOffset": 6815744
},
{
"name": "transformer.h.22.mlp.c_fc.bias",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 7667712
},
{
"name": "transformer.h.22.mlp.c_proj.q_weight",
"shape": [
2048,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6717440,
"byteOffset": 7684096
},
{
"name": "transformer.h.22.mlp.c_proj.q_scale",
"shape": [
2048,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 839680,
"byteOffset": 14401536
},
{
"name": "transformer.h.22.mlp.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15241216
},
{
"name": "transformer.h.23.ln_1.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15245312
},
{
"name": "transformer.h.23.ln_1.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 15249408
},
{
"name": "transformer.h.23.attn.c_attn.q_weight",
"shape": [
2304,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1916928,
"byteOffset": 15253504
},
{
"name": "transformer.h.23.attn.c_attn.q_scale",
"shape": [
2304,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 239616,
"byteOffset": 17170432
},
{
"name": "transformer.h.23.attn.c_attn.bias",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 17410048
},
{
"name": "transformer.h.23.attn.c_proj.q_weight",
"shape": [
2048,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1703936,
"byteOffset": 17414656
},
{
"name": "transformer.h.23.attn.c_proj.q_scale",
"shape": [
2048,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 212992,
"byteOffset": 19118592
},
{
"name": "transformer.h.23.attn.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19331584
},
{
"name": "transformer.h.23.ln_2.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19335680
},
{
"name": "transformer.h.23.ln_2.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 19339776
},
{
"name": "transformer.h.23.mlp.c_fc.q_weight",
"shape": [
8192,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6815744,
"byteOffset": 19343872
},
{
"name": "transformer.h.23.mlp.c_fc.q_scale",
"shape": [
8192,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 851968,
"byteOffset": 26159616
},
{
"name": "transformer.h.23.mlp.c_fc.bias",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 27011584
}
],
"md5sum": "1fe7215bed7fabf9108de3cbe34937d3"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 41000960,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
49280,
208
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41000960,
"byteOffset": 0
}
],
"md5sum": "606523ae3635513dfd6f17ee8295e81e"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 12694528,
"records": [
{
"name": "transformer.h.23.mlp.c_proj.q_weight",
"shape": [
2048,
820
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6717440,
"byteOffset": 0
},
{
"name": "transformer.h.23.mlp.c_proj.q_scale",
"shape": [
2048,
205
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 839680,
"byteOffset": 6717440
},
{
"name": "transformer.h.23.mlp.c_proj.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 7557120
},
{
"name": "transformer.ln_f.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 7561216
},
{
"name": "transformer.ln_f.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 7565312
},
{
"name": "lm_head.q_scale",
"shape": [
49280,
52
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5125120,
"byteOffset": 7569408
}
],
"md5sum": "5fe41842755fedf4d128d8d6a69bf90c"
}
]
}