|
{ |
|
"metadata": { |
|
"ParamSize": 392, |
|
"ParamBytes": 558430208.0, |
|
"BitsPerParam": 3.6444754828825427 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41000960, |
|
"records": [ |
|
{ |
|
"name": "transformer.wte.q_weight", |
|
"shape": [ |
|
49280, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41000960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "606523ae3635513dfd6f17ee8295e81e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30484480, |
|
"records": [ |
|
{ |
|
"name": "transformer.wte.q_scale", |
|
"shape": [ |
|
49280, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5125120, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.wpe.q_weight", |
|
"shape": [ |
|
2048, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1703936, |
|
"byteOffset": 5125120 |
|
}, |
|
{ |
|
"name": "transformer.wpe.q_scale", |
|
"shape": [ |
|
2048, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 212992, |
|
"byteOffset": 6829056 |
|
}, |
|
{ |
|
"name": "transformer.h.0.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 7042048 |
|
}, |
|
{ |
|
"name": "transformer.h.0.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 7046144 |
|
}, |
|
{ |
|
"name": "transformer.h.0.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1916928, |
|
"byteOffset": 7050240 |
|
}, |
|
{ |
|
"name": "transformer.h.0.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 239616, |
|
"byteOffset": 8967168 |
|
}, |
|
{ |
|
"name": "transformer.h.0.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 9206784 |
|
}, |
|
{ |
|
"name": "transformer.h.0.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1703936, |
|
"byteOffset": 9211392 |
|
}, |
|
{ |
|
"name": "transformer.h.0.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 212992, |
|
"byteOffset": 10915328 |
|
}, |
|
{ |
|
"name": "transformer.h.0.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11128320 |
|
}, |
|
{ |
|
"name": "transformer.h.0.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11132416 |
|
}, |
|
{ |
|
"name": "transformer.h.0.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11136512 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6815744, |
|
"byteOffset": 11140608 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 851968, |
|
"byteOffset": 17956352 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 18808320 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
820 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6717440, |
|
"byteOffset": 18824704 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
205 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 839680, |
|
"byteOffset": 25542144 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 26381824 |
|
}, |
|
{ |
|
"name": "transformer.h.1.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 26385920 |
|
}, |
|
{ |
|
"name": "transformer.h.1.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 26390016 |
|
}, |
|
{ |
|
"name": "transformer.h.1.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1916928, |
|
"byteOffset": 26394112 |
|
}, |
|
{ |
|
"name": "transformer.h.1.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 239616, |
|
"byteOffset": 28311040 |
|
}, |
|
{ |
|
"name": "transformer.h.1.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 28550656 |
|
}, |
|
{ |
|
"name": "transformer.h.1.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1703936, |
|
"byteOffset": 28555264 |
|
}, |
|
{ |
|
"name": "transformer.h.1.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 212992, |
|
"byteOffset": 30259200 |
|
}, |
|
{ |
|
"name": "transformer.h.1.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 30472192 |
|
}, |
|
{ |
|
"name": "transformer.h.1.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 30476288 |
|
}, |
|
{ |
|
"name": "transformer.h.1.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 30480384 |
|
} |
|
], |
|
"md5sum": "8ce048787f9afbc33ba261c2ad6151c2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27027968, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.1.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6815744, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 851968, |
|
"byteOffset": 6815744 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 7667712 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
820 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6717440, |
|
"byteOffset": 7684096 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
205 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 839680, |
|
"byteOffset": 14401536 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 15241216 |
|
}, |
|
{ |
|
"name": "transformer.h.2.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 15245312 |
|
}, |
|
{ |
|
"name": "transformer.h.2.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 15249408 |
|
}, |
|
{ |
|
"name": "transformer.h.2.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1916928, |
|
"byteOffset": 15253504 |
|
}, |
|
{ |
|
"name": "transformer.h.2.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 239616, |
|
"byteOffset": 17170432 |
|
}, |
|
{ |
|
"name": "transformer.h.2.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 17410048 |
|
}, |
|
{ |
|
"name": "transformer.h.2.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1703936, |
|
"byteOffset": 17414656 |
|
}, |
|
{ |
|
"name": "transformer.h.2.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 212992, |
|
"byteOffset": 19118592 |
|
}, |
|
{ |
|
"name": "transformer.h.2.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 19331584 |
|
}, |
|
{ |
|
"name": "transformer.h.2.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 19335680 |
|
}, |
|
{ |
|
"name": "transformer.h.2.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 19339776 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6815744, |
|
"byteOffset": 19343872 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 851968, |
|
"byteOffset": 26159616 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 27011584 |
|
} |
|
], |
|
"md5sum": "1ab45180b7a59d08ff8d26dc9ffd8051" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31003648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
820 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6717440, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
205 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 839680, |
|
"byteOffset": 6717440 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 7557120 |
|
}, |
|
{ |
|
"name": "transformer.h.3.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 7561216 |
|
}, |
|
{ |
|
"name": "transformer.h.3.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 7565312 |
|
}, |
|
{ |
|
"name": "transformer.h.3.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1916928, |
|
"byteOffset": 7569408 |
|
}, |
|
{ |
|
"name": "transformer.h.3.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 239616, |
|
"byteOffset": 9486336 |
|
}, |
|
{ |
|
"name": "transformer.h.3.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 9725952 |
|
}, |
|
{ |
|
"name": "transformer.h.3.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1703936, |
|
"byteOffset": 9730560 |
|
}, |
|
{ |
|
"name": "transformer.h.3.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 212992, |
|
"byteOffset": 11434496 |
|
}, |
|
{ |
|
"name": "transformer.h.3.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11647488 |
|
}, |
|
{ |
|
"name": "transformer.h.3.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11651584 |
|
}, |
|
{ |
|
"name": "transformer.h.3.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11655680 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6815744, |
|
"byteOffset": 11659776 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 851968, |
|
"byteOffset": 18475520 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 19327488 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
820 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6717440, |
|
"byteOffset": 19343872 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
205 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 839680, |
|
"byteOffset": 26061312 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 26900992 |
|
}, |
|
{ |
|
"name": "transformer.h.4.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 26905088 |
|
}, |
|
{ |
|
"name": "transformer.h.4.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 26909184 |
|
}, |
|
{ |
|
"name": "transformer.h.4.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1916928, |
|
"byteOffset": 26913280 |
|
}, |
|
{ |
|
"name": "transformer.h.4.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 239616, |
|
"byteOffset": 28830208 |
|
}, |
|
{ |
|
"name": "transformer.h.4.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 29069824 |
|
}, |
|
{ |
|
"name": "transformer.h.4.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1703936, |
|
"byteOffset": 29074432 |
|
}, |
|
{ |
|
"name": "transformer.h.4.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 212992, |
|
"byteOffset": 30778368 |
|
}, |
|
{ |
|
"name": "transformer.h.4.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 30991360 |
|
}, |
|
{ |
|
"name": "transformer.h.4.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 30995456 |
|
}, |
|
{ |
|
"name": "transformer.h.4.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 30999552 |
|
} |
|
], |
|
"md5sum": "8e07bc57bf1f1940f02ec372702969af" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27027968, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.4.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6815744, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 851968, |
|
"byteOffset": 6815744 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 7667712 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
820 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6717440, |
|
"byteOffset": 7684096 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
205 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 839680, |
|
"byteOffset": 14401536 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 15241216 |
|
}, |
|
{ |
|
"name": "transformer.h.5.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 15245312 |
|
}, |
|
{ |
|
"name": "transformer.h.5.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 15249408 |
|
}, |
|
{ |
|
"name": "transformer.h.5.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1916928, |
|
"byteOffset": 15253504 |
|
}, |
|
{ |
|
"name": "transformer.h.5.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 239616, |
|
"byteOffset": 17170432 |
|
}, |
|
{ |
|
"name": "transformer.h.5.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 17410048 |
|
}, |
|
{ |
|
"name": "transformer.h.5.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1703936, |
|
"byteOffset": 17414656 |
|
}, |
|
{ |
|
"name": "transformer.h.5.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 212992, |
|
"byteOffset": 19118592 |
|
}, |
|
{ |
|
"name": "transformer.h.5.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 19331584 |
|
}, |
|
{ |
|
"name": "transformer.h.5.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 19335680 |
|
}, |
|
{ |
|
"name": "transformer.h.5.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 19339776 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6815744, |
|
"byteOffset": 19343872 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 851968, |
|
"byteOffset": 26159616 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 27011584 |
|
} |
|
], |
|
"md5sum": "6a28592dd26102827fa7a616df3397d5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31003648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.5.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
820 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6717440, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
205 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 839680, |
|
"byteOffset": 6717440 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 7557120 |
|
}, |
|
{ |
|
"name": "transformer.h.6.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 7561216 |
|
}, |
|
{ |
|
"name": "transformer.h.6.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 7565312 |
|
}, |
|
{ |
|
"name": "transformer.h.6.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1916928, |
|
"byteOffset": 7569408 |
|
}, |
|
{ |
|
"name": "transformer.h.6.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 239616, |
|
"byteOffset": 9486336 |
|
}, |
|
{ |
|
"name": "transformer.h.6.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 9725952 |
|
}, |
|
{ |
|
"name": "transformer.h.6.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1703936, |
|
"byteOffset": 9730560 |
|
}, |
|
{ |
|
"name": "transformer.h.6.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 212992, |
|
"byteOffset": 11434496 |
|
}, |
|
{ |
|
"name": "transformer.h.6.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11647488 |
|
}, |
|
{ |
|
"name": "transformer.h.6.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11651584 |
|
}, |
|
{ |
|
"name": "transformer.h.6.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11655680 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6815744, |
|
"byteOffset": 11659776 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 851968, |
|
"byteOffset": 18475520 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 19327488 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
820 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6717440, |
|
"byteOffset": 19343872 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
205 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 839680, |
|
"byteOffset": 26061312 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 26900992 |
|
}, |
|
{ |
|
"name": "transformer.h.7.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 26905088 |
|
}, |
|
{ |
|
"name": "transformer.h.7.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 26909184 |
|
}, |
|
{ |
|
"name": "transformer.h.7.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1916928, |
|
"byteOffset": 26913280 |
|
}, |
|
{ |
|
"name": "transformer.h.7.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 239616, |
|
"byteOffset": 28830208 |
|
}, |
|
{ |
|
"name": "transformer.h.7.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 29069824 |
|
}, |
|
{ |
|
"name": "transformer.h.7.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1703936, |
|
"byteOffset": 29074432 |
|
}, |
|
{ |
|
"name": "transformer.h.7.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 212992, |
|
"byteOffset": 30778368 |
|
}, |
|
{ |
|
"name": "transformer.h.7.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 30991360 |
|
}, |
|
{ |
|
"name": "transformer.h.7.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 30995456 |
|
}, |
|
{ |
|
"name": "transformer.h.7.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 30999552 |
|
} |
|
], |
|
"md5sum": "f06f7a7f3d2f73a2a273265f78ba6843" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27027968, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.7.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6815744, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 851968, |
|
"byteOffset": 6815744 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 7667712 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
820 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6717440, |
|
"byteOffset": 7684096 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
205 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 839680, |
|
"byteOffset": 14401536 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 15241216 |
|
}, |
|
{ |
|
"name": "transformer.h.8.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 15245312 |
|
}, |
|
{ |
|
"name": "transformer.h.8.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 15249408 |
|
}, |
|
{ |
|
"name": "transformer.h.8.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1916928, |
|
"byteOffset": 15253504 |
|
}, |
|
{ |
|
"name": "transformer.h.8.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 239616, |
|
"byteOffset": 17170432 |
|
}, |
|
{ |
|
"name": "transformer.h.8.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 17410048 |
|
}, |
|
{ |
|
"name": "transformer.h.8.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1703936, |
|
"byteOffset": 17414656 |
|
}, |
|
{ |
|
"name": "transformer.h.8.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 212992, |
|
"byteOffset": 19118592 |
|
}, |
|
{ |
|
"name": "transformer.h.8.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 19331584 |
|
}, |
|
{ |
|
"name": "transformer.h.8.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 19335680 |
|
}, |
|
{ |
|
"name": "transformer.h.8.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 19339776 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6815744, |
|
"byteOffset": 19343872 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 851968, |
|
"byteOffset": 26159616 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 27011584 |
|
} |
|
], |
|
"md5sum": "a1951a7ba32dc40bb55c0877f64ea66a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31003648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.8.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
820 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6717440, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
205 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 839680, |
|
"byteOffset": 6717440 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 7557120 |
|
}, |
|
{ |
|
"name": "transformer.h.9.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 7561216 |
|
}, |
|
{ |
|
"name": "transformer.h.9.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 7565312 |
|
}, |
|
{ |
|
"name": "transformer.h.9.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1916928, |
|
"byteOffset": 7569408 |
|
}, |
|
{ |
|
"name": "transformer.h.9.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 239616, |
|
"byteOffset": 9486336 |
|
}, |
|
{ |
|
"name": "transformer.h.9.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 9725952 |
|
}, |
|
{ |
|
"name": "transformer.h.9.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1703936, |
|
"byteOffset": 9730560 |
|
}, |
|
{ |
|
"name": "transformer.h.9.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 212992, |
|
"byteOffset": 11434496 |
|
}, |
|
{ |
|
"name": "transformer.h.9.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11647488 |
|
}, |
|
{ |
|
"name": "transformer.h.9.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11651584 |
|
}, |
|
{ |
|
"name": "transformer.h.9.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11655680 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6815744, |
|
"byteOffset": 11659776 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 851968, |
|
"byteOffset": 18475520 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 19327488 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
820 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6717440, |
|
"byteOffset": 19343872 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
205 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 839680, |
|
"byteOffset": 26061312 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 26900992 |
|
}, |
|
{ |
|
"name": "transformer.h.10.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 26905088 |
|
}, |
|
{ |
|
"name": "transformer.h.10.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 26909184 |
|
}, |
|
{ |
|
"name": "transformer.h.10.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1916928, |
|
"byteOffset": 26913280 |
|
}, |
|
{ |
|
"name": "transformer.h.10.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 239616, |
|
"byteOffset": 28830208 |
|
}, |
|
{ |
|
"name": "transformer.h.10.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 29069824 |
|
}, |
|
{ |
|
"name": "transformer.h.10.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1703936, |
|
"byteOffset": 29074432 |
|
}, |
|
{ |
|
"name": "transformer.h.10.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 212992, |
|
"byteOffset": 30778368 |
|
}, |
|
{ |
|
"name": "transformer.h.10.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 30991360 |
|
}, |
|
{ |
|
"name": "transformer.h.10.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 30995456 |
|
}, |
|
{ |
|
"name": "transformer.h.10.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 30999552 |
|
} |
|
], |
|
"md5sum": "f65c7d31ab02fa807d10b4019146e248" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27027968, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.10.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6815744, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 851968, |
|
"byteOffset": 6815744 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 7667712 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
820 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6717440, |
|
"byteOffset": 7684096 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
205 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 839680, |
|
"byteOffset": 14401536 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 15241216 |
|
}, |
|
{ |
|
"name": "transformer.h.11.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 15245312 |
|
}, |
|
{ |
|
"name": "transformer.h.11.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 15249408 |
|
}, |
|
{ |
|
"name": "transformer.h.11.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1916928, |
|
"byteOffset": 15253504 |
|
}, |
|
{ |
|
"name": "transformer.h.11.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 239616, |
|
"byteOffset": 17170432 |
|
}, |
|
{ |
|
"name": "transformer.h.11.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 17410048 |
|
}, |
|
{ |
|
"name": "transformer.h.11.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1703936, |
|
"byteOffset": 17414656 |
|
}, |
|
{ |
|
"name": "transformer.h.11.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 212992, |
|
"byteOffset": 19118592 |
|
}, |
|
{ |
|
"name": "transformer.h.11.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 19331584 |
|
}, |
|
{ |
|
"name": "transformer.h.11.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 19335680 |
|
}, |
|
{ |
|
"name": "transformer.h.11.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 19339776 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6815744, |
|
"byteOffset": 19343872 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 851968, |
|
"byteOffset": 26159616 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 27011584 |
|
} |
|
], |
|
"md5sum": "b212963b5c82c5c3c02cf96d37508fd2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31003648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.11.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
820 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6717440, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
205 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 839680, |
|
"byteOffset": 6717440 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 7557120 |
|
}, |
|
{ |
|
"name": "transformer.h.12.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 7561216 |
|
}, |
|
{ |
|
"name": "transformer.h.12.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 7565312 |
|
}, |
|
{ |
|
"name": "transformer.h.12.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1916928, |
|
"byteOffset": 7569408 |
|
}, |
|
{ |
|
"name": "transformer.h.12.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 239616, |
|
"byteOffset": 9486336 |
|
}, |
|
{ |
|
"name": "transformer.h.12.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 9725952 |
|
}, |
|
{ |
|
"name": "transformer.h.12.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1703936, |
|
"byteOffset": 9730560 |
|
}, |
|
{ |
|
"name": "transformer.h.12.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 212992, |
|
"byteOffset": 11434496 |
|
}, |
|
{ |
|
"name": "transformer.h.12.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11647488 |
|
}, |
|
{ |
|
"name": "transformer.h.12.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11651584 |
|
}, |
|
{ |
|
"name": "transformer.h.12.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11655680 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6815744, |
|
"byteOffset": 11659776 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 851968, |
|
"byteOffset": 18475520 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 19327488 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
820 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6717440, |
|
"byteOffset": 19343872 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
205 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 839680, |
|
"byteOffset": 26061312 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 26900992 |
|
}, |
|
{ |
|
"name": "transformer.h.13.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 26905088 |
|
}, |
|
{ |
|
"name": "transformer.h.13.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 26909184 |
|
}, |
|
{ |
|
"name": "transformer.h.13.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1916928, |
|
"byteOffset": 26913280 |
|
}, |
|
{ |
|
"name": "transformer.h.13.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 239616, |
|
"byteOffset": 28830208 |
|
}, |
|
{ |
|
"name": "transformer.h.13.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 29069824 |
|
}, |
|
{ |
|
"name": "transformer.h.13.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1703936, |
|
"byteOffset": 29074432 |
|
}, |
|
{ |
|
"name": "transformer.h.13.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 212992, |
|
"byteOffset": 30778368 |
|
}, |
|
{ |
|
"name": "transformer.h.13.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 30991360 |
|
}, |
|
{ |
|
"name": "transformer.h.13.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 30995456 |
|
}, |
|
{ |
|
"name": "transformer.h.13.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 30999552 |
|
} |
|
], |
|
"md5sum": "a0675f5284e954f8974036cdad0049fb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27027968, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.13.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6815744, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 851968, |
|
"byteOffset": 6815744 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 7667712 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
820 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6717440, |
|
"byteOffset": 7684096 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
205 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 839680, |
|
"byteOffset": 14401536 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 15241216 |
|
}, |
|
{ |
|
"name": "transformer.h.14.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 15245312 |
|
}, |
|
{ |
|
"name": "transformer.h.14.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 15249408 |
|
}, |
|
{ |
|
"name": "transformer.h.14.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1916928, |
|
"byteOffset": 15253504 |
|
}, |
|
{ |
|
"name": "transformer.h.14.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 239616, |
|
"byteOffset": 17170432 |
|
}, |
|
{ |
|
"name": "transformer.h.14.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 17410048 |
|
}, |
|
{ |
|
"name": "transformer.h.14.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1703936, |
|
"byteOffset": 17414656 |
|
}, |
|
{ |
|
"name": "transformer.h.14.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 212992, |
|
"byteOffset": 19118592 |
|
}, |
|
{ |
|
"name": "transformer.h.14.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 19331584 |
|
}, |
|
{ |
|
"name": "transformer.h.14.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 19335680 |
|
}, |
|
{ |
|
"name": "transformer.h.14.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 19339776 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6815744, |
|
"byteOffset": 19343872 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 851968, |
|
"byteOffset": 26159616 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 27011584 |
|
} |
|
], |
|
"md5sum": "cbe10c01e3fc3e1827997af59d861869" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31003648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.14.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
820 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6717440, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
205 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 839680, |
|
"byteOffset": 6717440 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 7557120 |
|
}, |
|
{ |
|
"name": "transformer.h.15.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 7561216 |
|
}, |
|
{ |
|
"name": "transformer.h.15.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 7565312 |
|
}, |
|
{ |
|
"name": "transformer.h.15.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1916928, |
|
"byteOffset": 7569408 |
|
}, |
|
{ |
|
"name": "transformer.h.15.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 239616, |
|
"byteOffset": 9486336 |
|
}, |
|
{ |
|
"name": "transformer.h.15.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 9725952 |
|
}, |
|
{ |
|
"name": "transformer.h.15.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1703936, |
|
"byteOffset": 9730560 |
|
}, |
|
{ |
|
"name": "transformer.h.15.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 212992, |
|
"byteOffset": 11434496 |
|
}, |
|
{ |
|
"name": "transformer.h.15.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11647488 |
|
}, |
|
{ |
|
"name": "transformer.h.15.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11651584 |
|
}, |
|
{ |
|
"name": "transformer.h.15.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11655680 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6815744, |
|
"byteOffset": 11659776 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 851968, |
|
"byteOffset": 18475520 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 19327488 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
820 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6717440, |
|
"byteOffset": 19343872 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
205 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 839680, |
|
"byteOffset": 26061312 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 26900992 |
|
}, |
|
{ |
|
"name": "transformer.h.16.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 26905088 |
|
}, |
|
{ |
|
"name": "transformer.h.16.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 26909184 |
|
}, |
|
{ |
|
"name": "transformer.h.16.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1916928, |
|
"byteOffset": 26913280 |
|
}, |
|
{ |
|
"name": "transformer.h.16.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 239616, |
|
"byteOffset": 28830208 |
|
}, |
|
{ |
|
"name": "transformer.h.16.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 29069824 |
|
}, |
|
{ |
|
"name": "transformer.h.16.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1703936, |
|
"byteOffset": 29074432 |
|
}, |
|
{ |
|
"name": "transformer.h.16.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 212992, |
|
"byteOffset": 30778368 |
|
}, |
|
{ |
|
"name": "transformer.h.16.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 30991360 |
|
}, |
|
{ |
|
"name": "transformer.h.16.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 30995456 |
|
}, |
|
{ |
|
"name": "transformer.h.16.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 30999552 |
|
} |
|
], |
|
"md5sum": "0dae6e43d6550288de886eceb7dfb53e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27027968, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.16.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6815744, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 851968, |
|
"byteOffset": 6815744 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 7667712 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
820 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6717440, |
|
"byteOffset": 7684096 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
205 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 839680, |
|
"byteOffset": 14401536 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 15241216 |
|
}, |
|
{ |
|
"name": "transformer.h.17.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 15245312 |
|
}, |
|
{ |
|
"name": "transformer.h.17.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 15249408 |
|
}, |
|
{ |
|
"name": "transformer.h.17.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1916928, |
|
"byteOffset": 15253504 |
|
}, |
|
{ |
|
"name": "transformer.h.17.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 239616, |
|
"byteOffset": 17170432 |
|
}, |
|
{ |
|
"name": "transformer.h.17.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 17410048 |
|
}, |
|
{ |
|
"name": "transformer.h.17.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1703936, |
|
"byteOffset": 17414656 |
|
}, |
|
{ |
|
"name": "transformer.h.17.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 212992, |
|
"byteOffset": 19118592 |
|
}, |
|
{ |
|
"name": "transformer.h.17.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 19331584 |
|
}, |
|
{ |
|
"name": "transformer.h.17.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 19335680 |
|
}, |
|
{ |
|
"name": "transformer.h.17.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 19339776 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6815744, |
|
"byteOffset": 19343872 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 851968, |
|
"byteOffset": 26159616 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 27011584 |
|
} |
|
], |
|
"md5sum": "4b15f38ee7b07220edc1330d5a1ac1dc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31003648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.17.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
820 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6717440, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
205 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 839680, |
|
"byteOffset": 6717440 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 7557120 |
|
}, |
|
{ |
|
"name": "transformer.h.18.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 7561216 |
|
}, |
|
{ |
|
"name": "transformer.h.18.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 7565312 |
|
}, |
|
{ |
|
"name": "transformer.h.18.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1916928, |
|
"byteOffset": 7569408 |
|
}, |
|
{ |
|
"name": "transformer.h.18.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 239616, |
|
"byteOffset": 9486336 |
|
}, |
|
{ |
|
"name": "transformer.h.18.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 9725952 |
|
}, |
|
{ |
|
"name": "transformer.h.18.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1703936, |
|
"byteOffset": 9730560 |
|
}, |
|
{ |
|
"name": "transformer.h.18.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 212992, |
|
"byteOffset": 11434496 |
|
}, |
|
{ |
|
"name": "transformer.h.18.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11647488 |
|
}, |
|
{ |
|
"name": "transformer.h.18.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11651584 |
|
}, |
|
{ |
|
"name": "transformer.h.18.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11655680 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6815744, |
|
"byteOffset": 11659776 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 851968, |
|
"byteOffset": 18475520 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 19327488 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
820 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6717440, |
|
"byteOffset": 19343872 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
205 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 839680, |
|
"byteOffset": 26061312 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 26900992 |
|
}, |
|
{ |
|
"name": "transformer.h.19.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 26905088 |
|
}, |
|
{ |
|
"name": "transformer.h.19.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 26909184 |
|
}, |
|
{ |
|
"name": "transformer.h.19.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1916928, |
|
"byteOffset": 26913280 |
|
}, |
|
{ |
|
"name": "transformer.h.19.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 239616, |
|
"byteOffset": 28830208 |
|
}, |
|
{ |
|
"name": "transformer.h.19.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 29069824 |
|
}, |
|
{ |
|
"name": "transformer.h.19.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1703936, |
|
"byteOffset": 29074432 |
|
}, |
|
{ |
|
"name": "transformer.h.19.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 212992, |
|
"byteOffset": 30778368 |
|
}, |
|
{ |
|
"name": "transformer.h.19.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 30991360 |
|
}, |
|
{ |
|
"name": "transformer.h.19.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 30995456 |
|
}, |
|
{ |
|
"name": "transformer.h.19.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 30999552 |
|
} |
|
], |
|
"md5sum": "8e1f308884d460271e4a169ee5902ba6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27027968, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.19.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6815744, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 851968, |
|
"byteOffset": 6815744 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 7667712 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
820 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6717440, |
|
"byteOffset": 7684096 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
205 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 839680, |
|
"byteOffset": 14401536 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 15241216 |
|
}, |
|
{ |
|
"name": "transformer.h.20.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 15245312 |
|
}, |
|
{ |
|
"name": "transformer.h.20.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 15249408 |
|
}, |
|
{ |
|
"name": "transformer.h.20.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1916928, |
|
"byteOffset": 15253504 |
|
}, |
|
{ |
|
"name": "transformer.h.20.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 239616, |
|
"byteOffset": 17170432 |
|
}, |
|
{ |
|
"name": "transformer.h.20.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 17410048 |
|
}, |
|
{ |
|
"name": "transformer.h.20.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1703936, |
|
"byteOffset": 17414656 |
|
}, |
|
{ |
|
"name": "transformer.h.20.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 212992, |
|
"byteOffset": 19118592 |
|
}, |
|
{ |
|
"name": "transformer.h.20.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 19331584 |
|
}, |
|
{ |
|
"name": "transformer.h.20.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 19335680 |
|
}, |
|
{ |
|
"name": "transformer.h.20.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 19339776 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6815744, |
|
"byteOffset": 19343872 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 851968, |
|
"byteOffset": 26159616 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 27011584 |
|
} |
|
], |
|
"md5sum": "cddf375922759cb0e1ce296fc360285b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31003648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.20.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
820 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6717440, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
205 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 839680, |
|
"byteOffset": 6717440 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 7557120 |
|
}, |
|
{ |
|
"name": "transformer.h.21.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 7561216 |
|
}, |
|
{ |
|
"name": "transformer.h.21.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 7565312 |
|
}, |
|
{ |
|
"name": "transformer.h.21.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1916928, |
|
"byteOffset": 7569408 |
|
}, |
|
{ |
|
"name": "transformer.h.21.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 239616, |
|
"byteOffset": 9486336 |
|
}, |
|
{ |
|
"name": "transformer.h.21.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 9725952 |
|
}, |
|
{ |
|
"name": "transformer.h.21.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1703936, |
|
"byteOffset": 9730560 |
|
}, |
|
{ |
|
"name": "transformer.h.21.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 212992, |
|
"byteOffset": 11434496 |
|
}, |
|
{ |
|
"name": "transformer.h.21.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11647488 |
|
}, |
|
{ |
|
"name": "transformer.h.21.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11651584 |
|
}, |
|
{ |
|
"name": "transformer.h.21.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 11655680 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6815744, |
|
"byteOffset": 11659776 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 851968, |
|
"byteOffset": 18475520 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 19327488 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
820 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6717440, |
|
"byteOffset": 19343872 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
205 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 839680, |
|
"byteOffset": 26061312 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 26900992 |
|
}, |
|
{ |
|
"name": "transformer.h.22.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 26905088 |
|
}, |
|
{ |
|
"name": "transformer.h.22.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 26909184 |
|
}, |
|
{ |
|
"name": "transformer.h.22.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1916928, |
|
"byteOffset": 26913280 |
|
}, |
|
{ |
|
"name": "transformer.h.22.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 239616, |
|
"byteOffset": 28830208 |
|
}, |
|
{ |
|
"name": "transformer.h.22.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 29069824 |
|
}, |
|
{ |
|
"name": "transformer.h.22.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1703936, |
|
"byteOffset": 29074432 |
|
}, |
|
{ |
|
"name": "transformer.h.22.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 212992, |
|
"byteOffset": 30778368 |
|
}, |
|
{ |
|
"name": "transformer.h.22.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 30991360 |
|
}, |
|
{ |
|
"name": "transformer.h.22.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 30995456 |
|
}, |
|
{ |
|
"name": "transformer.h.22.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 30999552 |
|
} |
|
], |
|
"md5sum": "4b195d379c0b42d77e987ed6e53bf7a6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27027968, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.22.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6815744, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 851968, |
|
"byteOffset": 6815744 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 7667712 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
820 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6717440, |
|
"byteOffset": 7684096 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
205 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 839680, |
|
"byteOffset": 14401536 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 15241216 |
|
}, |
|
{ |
|
"name": "transformer.h.23.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 15245312 |
|
}, |
|
{ |
|
"name": "transformer.h.23.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 15249408 |
|
}, |
|
{ |
|
"name": "transformer.h.23.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1916928, |
|
"byteOffset": 15253504 |
|
}, |
|
{ |
|
"name": "transformer.h.23.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 239616, |
|
"byteOffset": 17170432 |
|
}, |
|
{ |
|
"name": "transformer.h.23.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 17410048 |
|
}, |
|
{ |
|
"name": "transformer.h.23.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1703936, |
|
"byteOffset": 17414656 |
|
}, |
|
{ |
|
"name": "transformer.h.23.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 212992, |
|
"byteOffset": 19118592 |
|
}, |
|
{ |
|
"name": "transformer.h.23.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 19331584 |
|
}, |
|
{ |
|
"name": "transformer.h.23.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 19335680 |
|
}, |
|
{ |
|
"name": "transformer.h.23.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 19339776 |
|
}, |
|
{ |
|
"name": "transformer.h.23.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6815744, |
|
"byteOffset": 19343872 |
|
}, |
|
{ |
|
"name": "transformer.h.23.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 851968, |
|
"byteOffset": 26159616 |
|
}, |
|
{ |
|
"name": "transformer.h.23.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 27011584 |
|
} |
|
], |
|
"md5sum": "1fe7215bed7fabf9108de3cbe34937d3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41000960, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_weight", |
|
"shape": [ |
|
49280, |
|
208 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41000960, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "606523ae3635513dfd6f17ee8295e81e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 12694528, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.23.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
820 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6717440, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.23.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
205 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 839680, |
|
"byteOffset": 6717440 |
|
}, |
|
{ |
|
"name": "transformer.h.23.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 7557120 |
|
}, |
|
{ |
|
"name": "transformer.ln_f.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 7561216 |
|
}, |
|
{ |
|
"name": "transformer.ln_f.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 7565312 |
|
}, |
|
{ |
|
"name": "lm_head.q_scale", |
|
"shape": [ |
|
49280, |
|
52 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5125120, |
|
"byteOffset": 7569408 |
|
} |
|
], |
|
"md5sum": "5fe41842755fedf4d128d8d6a69bf90c" |
|
} |
|
] |
|
} |