{ "metadata": { "ParamSize": 392, "ParamBytes": 558430208.0, "BitsPerParam": 3.6444754828825427 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 41000960, "records": [ { "name": "transformer.wte.q_weight", "shape": [ 49280, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41000960, "byteOffset": 0 } ], "md5sum": "606523ae3635513dfd6f17ee8295e81e" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 30484480, "records": [ { "name": "transformer.wte.q_scale", "shape": [ 49280, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5125120, "byteOffset": 0 }, { "name": "transformer.wpe.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 5125120 }, { "name": "transformer.wpe.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 6829056 }, { "name": "transformer.h.0.ln_1.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 7042048 }, { "name": "transformer.h.0.ln_1.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 7046144 }, { "name": "transformer.h.0.attn.c_attn.q_weight", "shape": [ 2304, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1916928, "byteOffset": 7050240 }, { "name": "transformer.h.0.attn.c_attn.q_scale", "shape": [ 2304, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 239616, "byteOffset": 8967168 }, { "name": "transformer.h.0.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 9206784 }, { "name": "transformer.h.0.attn.c_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 9211392 }, { "name": "transformer.h.0.attn.c_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 10915328 }, { "name": "transformer.h.0.attn.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11128320 }, { "name": "transformer.h.0.ln_2.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11132416 }, { "name": "transformer.h.0.ln_2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11136512 }, { "name": "transformer.h.0.mlp.c_fc.q_weight", "shape": [ 8192, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6815744, "byteOffset": 11140608 }, { "name": "transformer.h.0.mlp.c_fc.q_scale", "shape": [ 8192, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 851968, "byteOffset": 17956352 }, { "name": "transformer.h.0.mlp.c_fc.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18808320 }, { "name": "transformer.h.0.mlp.c_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 18824704 }, { "name": "transformer.h.0.mlp.c_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 25542144 }, { "name": "transformer.h.0.mlp.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 26381824 }, { "name": "transformer.h.1.ln_1.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 26385920 }, { "name": "transformer.h.1.ln_1.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 26390016 }, { "name": "transformer.h.1.attn.c_attn.q_weight", "shape": [ 2304, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1916928, "byteOffset": 26394112 }, { "name": "transformer.h.1.attn.c_attn.q_scale", "shape": [ 2304, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 239616, "byteOffset": 28311040 }, { "name": "transformer.h.1.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 28550656 }, { "name": "transformer.h.1.attn.c_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 28555264 }, { "name": "transformer.h.1.attn.c_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 30259200 }, { "name": "transformer.h.1.attn.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30472192 }, { "name": "transformer.h.1.ln_2.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30476288 }, { "name": "transformer.h.1.ln_2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30480384 } ], "md5sum": "8ce048787f9afbc33ba261c2ad6151c2" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 27027968, "records": [ { "name": "transformer.h.1.mlp.c_fc.q_weight", "shape": [ 8192, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6815744, "byteOffset": 0 }, { "name": "transformer.h.1.mlp.c_fc.q_scale", "shape": [ 8192, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 851968, "byteOffset": 6815744 }, { "name": "transformer.h.1.mlp.c_fc.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7667712 }, { "name": "transformer.h.1.mlp.c_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 7684096 }, { "name": "transformer.h.1.mlp.c_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 14401536 }, { "name": "transformer.h.1.mlp.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15241216 }, { "name": "transformer.h.2.ln_1.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15245312 }, { "name": "transformer.h.2.ln_1.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15249408 }, { "name": "transformer.h.2.attn.c_attn.q_weight", "shape": [ 2304, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1916928, "byteOffset": 15253504 }, { "name": "transformer.h.2.attn.c_attn.q_scale", "shape": [ 2304, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 239616, "byteOffset": 17170432 }, { "name": "transformer.h.2.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 17410048 }, { "name": "transformer.h.2.attn.c_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 17414656 }, { "name": "transformer.h.2.attn.c_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 19118592 }, { "name": "transformer.h.2.attn.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19331584 }, { "name": "transformer.h.2.ln_2.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19335680 }, { "name": "transformer.h.2.ln_2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19339776 }, { "name": "transformer.h.2.mlp.c_fc.q_weight", "shape": [ 8192, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6815744, "byteOffset": 19343872 }, { "name": "transformer.h.2.mlp.c_fc.q_scale", "shape": [ 8192, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 851968, "byteOffset": 26159616 }, { "name": "transformer.h.2.mlp.c_fc.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27011584 } ], "md5sum": "1ab45180b7a59d08ff8d26dc9ffd8051" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 31003648, "records": [ { "name": "transformer.h.2.mlp.c_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 0 }, { "name": "transformer.h.2.mlp.c_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 6717440 }, { "name": "transformer.h.2.mlp.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 7557120 }, { "name": "transformer.h.3.ln_1.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 7561216 }, { "name": "transformer.h.3.ln_1.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 7565312 }, { "name": "transformer.h.3.attn.c_attn.q_weight", "shape": [ 2304, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1916928, "byteOffset": 7569408 }, { "name": "transformer.h.3.attn.c_attn.q_scale", "shape": [ 2304, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 239616, "byteOffset": 9486336 }, { "name": "transformer.h.3.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 9725952 }, { "name": "transformer.h.3.attn.c_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 9730560 }, { "name": "transformer.h.3.attn.c_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 11434496 }, { "name": "transformer.h.3.attn.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11647488 }, { "name": "transformer.h.3.ln_2.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11651584 }, { "name": "transformer.h.3.ln_2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11655680 }, { "name": "transformer.h.3.mlp.c_fc.q_weight", "shape": [ 8192, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6815744, "byteOffset": 11659776 }, { "name": "transformer.h.3.mlp.c_fc.q_scale", "shape": [ 8192, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 851968, "byteOffset": 18475520 }, { "name": "transformer.h.3.mlp.c_fc.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19327488 }, { "name": "transformer.h.3.mlp.c_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 19343872 }, { "name": "transformer.h.3.mlp.c_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 26061312 }, { "name": "transformer.h.3.mlp.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 26900992 }, { "name": "transformer.h.4.ln_1.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 26905088 }, { "name": "transformer.h.4.ln_1.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 26909184 }, { "name": "transformer.h.4.attn.c_attn.q_weight", "shape": [ 2304, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1916928, "byteOffset": 26913280 }, { "name": "transformer.h.4.attn.c_attn.q_scale", "shape": [ 2304, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 239616, "byteOffset": 28830208 }, { "name": "transformer.h.4.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 29069824 }, { "name": "transformer.h.4.attn.c_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 29074432 }, { "name": "transformer.h.4.attn.c_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 30778368 }, { "name": "transformer.h.4.attn.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30991360 }, { "name": "transformer.h.4.ln_2.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30995456 }, { "name": "transformer.h.4.ln_2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30999552 } ], "md5sum": "8e07bc57bf1f1940f02ec372702969af" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 27027968, "records": [ { "name": "transformer.h.4.mlp.c_fc.q_weight", "shape": [ 8192, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6815744, "byteOffset": 0 }, { "name": "transformer.h.4.mlp.c_fc.q_scale", "shape": [ 8192, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 851968, "byteOffset": 6815744 }, { "name": "transformer.h.4.mlp.c_fc.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7667712 }, { "name": "transformer.h.4.mlp.c_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 7684096 }, { "name": "transformer.h.4.mlp.c_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 14401536 }, { "name": "transformer.h.4.mlp.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15241216 }, { "name": "transformer.h.5.ln_1.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15245312 }, { "name": "transformer.h.5.ln_1.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15249408 }, { "name": "transformer.h.5.attn.c_attn.q_weight", "shape": [ 2304, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1916928, "byteOffset": 15253504 }, { "name": "transformer.h.5.attn.c_attn.q_scale", "shape": [ 2304, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 239616, "byteOffset": 17170432 }, { "name": "transformer.h.5.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 17410048 }, { "name": "transformer.h.5.attn.c_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 17414656 }, { "name": "transformer.h.5.attn.c_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 19118592 }, { "name": "transformer.h.5.attn.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19331584 }, { "name": "transformer.h.5.ln_2.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19335680 }, { "name": "transformer.h.5.ln_2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19339776 }, { "name": "transformer.h.5.mlp.c_fc.q_weight", "shape": [ 8192, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6815744, "byteOffset": 19343872 }, { "name": "transformer.h.5.mlp.c_fc.q_scale", "shape": [ 8192, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 851968, "byteOffset": 26159616 }, { "name": "transformer.h.5.mlp.c_fc.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27011584 } ], "md5sum": "6a28592dd26102827fa7a616df3397d5" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 31003648, "records": [ { "name": "transformer.h.5.mlp.c_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 0 }, { "name": "transformer.h.5.mlp.c_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 6717440 }, { "name": "transformer.h.5.mlp.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 7557120 }, { "name": "transformer.h.6.ln_1.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 7561216 }, { "name": "transformer.h.6.ln_1.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 7565312 }, { "name": "transformer.h.6.attn.c_attn.q_weight", "shape": [ 2304, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1916928, "byteOffset": 7569408 }, { "name": "transformer.h.6.attn.c_attn.q_scale", "shape": [ 2304, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 239616, "byteOffset": 9486336 }, { "name": "transformer.h.6.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 9725952 }, { "name": "transformer.h.6.attn.c_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 9730560 }, { "name": "transformer.h.6.attn.c_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 11434496 }, { "name": "transformer.h.6.attn.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11647488 }, { "name": "transformer.h.6.ln_2.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11651584 }, { "name": "transformer.h.6.ln_2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11655680 }, { "name": "transformer.h.6.mlp.c_fc.q_weight", "shape": [ 8192, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6815744, "byteOffset": 11659776 }, { "name": "transformer.h.6.mlp.c_fc.q_scale", "shape": [ 8192, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 851968, "byteOffset": 18475520 }, { "name": "transformer.h.6.mlp.c_fc.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19327488 }, { "name": "transformer.h.6.mlp.c_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 19343872 }, { "name": "transformer.h.6.mlp.c_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 26061312 }, { "name": "transformer.h.6.mlp.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 26900992 }, { "name": "transformer.h.7.ln_1.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 26905088 }, { "name": "transformer.h.7.ln_1.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 26909184 }, { "name": "transformer.h.7.attn.c_attn.q_weight", "shape": [ 2304, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1916928, "byteOffset": 26913280 }, { "name": "transformer.h.7.attn.c_attn.q_scale", "shape": [ 2304, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 239616, "byteOffset": 28830208 }, { "name": "transformer.h.7.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 29069824 }, { "name": "transformer.h.7.attn.c_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 29074432 }, { "name": "transformer.h.7.attn.c_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 30778368 }, { "name": "transformer.h.7.attn.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30991360 }, { "name": "transformer.h.7.ln_2.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30995456 }, { "name": "transformer.h.7.ln_2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30999552 } ], "md5sum": "f06f7a7f3d2f73a2a273265f78ba6843" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 27027968, "records": [ { "name": "transformer.h.7.mlp.c_fc.q_weight", "shape": [ 8192, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6815744, "byteOffset": 0 }, { "name": "transformer.h.7.mlp.c_fc.q_scale", "shape": [ 8192, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 851968, "byteOffset": 6815744 }, { "name": "transformer.h.7.mlp.c_fc.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7667712 }, { "name": "transformer.h.7.mlp.c_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 7684096 }, { "name": "transformer.h.7.mlp.c_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 14401536 }, { "name": "transformer.h.7.mlp.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15241216 }, { "name": "transformer.h.8.ln_1.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15245312 }, { "name": "transformer.h.8.ln_1.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15249408 }, { "name": "transformer.h.8.attn.c_attn.q_weight", "shape": [ 2304, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1916928, "byteOffset": 15253504 }, { "name": "transformer.h.8.attn.c_attn.q_scale", "shape": [ 2304, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 239616, "byteOffset": 17170432 }, { "name": "transformer.h.8.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 17410048 }, { "name": "transformer.h.8.attn.c_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 17414656 }, { "name": "transformer.h.8.attn.c_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 19118592 }, { "name": "transformer.h.8.attn.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19331584 }, { "name": "transformer.h.8.ln_2.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19335680 }, { "name": "transformer.h.8.ln_2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19339776 }, { "name": "transformer.h.8.mlp.c_fc.q_weight", "shape": [ 8192, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6815744, "byteOffset": 19343872 }, { "name": "transformer.h.8.mlp.c_fc.q_scale", "shape": [ 8192, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 851968, "byteOffset": 26159616 }, { "name": "transformer.h.8.mlp.c_fc.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27011584 } ], "md5sum": "a1951a7ba32dc40bb55c0877f64ea66a" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 31003648, "records": [ { "name": "transformer.h.8.mlp.c_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 0 }, { "name": "transformer.h.8.mlp.c_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 6717440 }, { "name": "transformer.h.8.mlp.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 7557120 }, { "name": "transformer.h.9.ln_1.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 7561216 }, { "name": "transformer.h.9.ln_1.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 7565312 }, { "name": "transformer.h.9.attn.c_attn.q_weight", "shape": [ 2304, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1916928, "byteOffset": 7569408 }, { "name": "transformer.h.9.attn.c_attn.q_scale", "shape": [ 2304, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 239616, "byteOffset": 9486336 }, { "name": "transformer.h.9.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 9725952 }, { "name": "transformer.h.9.attn.c_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 9730560 }, { "name": "transformer.h.9.attn.c_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 11434496 }, { "name": "transformer.h.9.attn.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11647488 }, { "name": "transformer.h.9.ln_2.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11651584 }, { "name": "transformer.h.9.ln_2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11655680 }, { "name": "transformer.h.9.mlp.c_fc.q_weight", "shape": [ 8192, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6815744, "byteOffset": 11659776 }, { "name": "transformer.h.9.mlp.c_fc.q_scale", "shape": [ 8192, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 851968, "byteOffset": 18475520 }, { "name": "transformer.h.9.mlp.c_fc.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19327488 }, { "name": "transformer.h.9.mlp.c_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 19343872 }, { "name": "transformer.h.9.mlp.c_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 26061312 }, { "name": "transformer.h.9.mlp.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 26900992 }, { "name": "transformer.h.10.ln_1.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 26905088 }, { "name": "transformer.h.10.ln_1.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 26909184 }, { "name": "transformer.h.10.attn.c_attn.q_weight", "shape": [ 2304, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1916928, "byteOffset": 26913280 }, { "name": "transformer.h.10.attn.c_attn.q_scale", "shape": [ 2304, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 239616, "byteOffset": 28830208 }, { "name": "transformer.h.10.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 29069824 }, { "name": "transformer.h.10.attn.c_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 29074432 }, { "name": "transformer.h.10.attn.c_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 30778368 }, { "name": "transformer.h.10.attn.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30991360 }, { "name": "transformer.h.10.ln_2.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30995456 }, { "name": "transformer.h.10.ln_2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30999552 } ], "md5sum": "f65c7d31ab02fa807d10b4019146e248" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 27027968, "records": [ { "name": "transformer.h.10.mlp.c_fc.q_weight", "shape": [ 8192, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6815744, "byteOffset": 0 }, { "name": "transformer.h.10.mlp.c_fc.q_scale", "shape": [ 8192, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 851968, "byteOffset": 6815744 }, { "name": "transformer.h.10.mlp.c_fc.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7667712 }, { "name": "transformer.h.10.mlp.c_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 7684096 }, { "name": "transformer.h.10.mlp.c_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 14401536 }, { "name": "transformer.h.10.mlp.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15241216 }, { "name": "transformer.h.11.ln_1.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15245312 }, { "name": "transformer.h.11.ln_1.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15249408 }, { "name": "transformer.h.11.attn.c_attn.q_weight", "shape": [ 2304, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1916928, "byteOffset": 15253504 }, { "name": "transformer.h.11.attn.c_attn.q_scale", "shape": [ 2304, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 239616, "byteOffset": 17170432 }, { "name": "transformer.h.11.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 17410048 }, { "name": "transformer.h.11.attn.c_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 17414656 }, { "name": "transformer.h.11.attn.c_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 19118592 }, { "name": "transformer.h.11.attn.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19331584 }, { "name": "transformer.h.11.ln_2.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19335680 }, { "name": "transformer.h.11.ln_2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19339776 }, { "name": "transformer.h.11.mlp.c_fc.q_weight", "shape": [ 8192, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6815744, "byteOffset": 19343872 }, { "name": "transformer.h.11.mlp.c_fc.q_scale", "shape": [ 8192, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 851968, "byteOffset": 26159616 }, { "name": "transformer.h.11.mlp.c_fc.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27011584 } ], "md5sum": "b212963b5c82c5c3c02cf96d37508fd2" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 31003648, "records": [ { "name": "transformer.h.11.mlp.c_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 0 }, { "name": "transformer.h.11.mlp.c_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 6717440 }, { "name": "transformer.h.11.mlp.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 7557120 }, { "name": "transformer.h.12.ln_1.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 7561216 }, { "name": "transformer.h.12.ln_1.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 7565312 }, { "name": "transformer.h.12.attn.c_attn.q_weight", "shape": [ 2304, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1916928, "byteOffset": 7569408 }, { "name": "transformer.h.12.attn.c_attn.q_scale", "shape": [ 2304, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 239616, "byteOffset": 9486336 }, { "name": "transformer.h.12.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 9725952 }, { "name": "transformer.h.12.attn.c_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 9730560 }, { "name": "transformer.h.12.attn.c_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 11434496 }, { "name": "transformer.h.12.attn.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11647488 }, { "name": "transformer.h.12.ln_2.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11651584 }, { "name": "transformer.h.12.ln_2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11655680 }, { "name": "transformer.h.12.mlp.c_fc.q_weight", "shape": [ 8192, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6815744, "byteOffset": 11659776 }, { "name": "transformer.h.12.mlp.c_fc.q_scale", "shape": [ 8192, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 851968, "byteOffset": 18475520 }, { "name": "transformer.h.12.mlp.c_fc.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19327488 }, { "name": "transformer.h.12.mlp.c_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 19343872 }, { "name": "transformer.h.12.mlp.c_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 26061312 }, { "name": "transformer.h.12.mlp.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 26900992 }, { "name": "transformer.h.13.ln_1.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 26905088 }, { "name": "transformer.h.13.ln_1.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 26909184 }, { "name": "transformer.h.13.attn.c_attn.q_weight", "shape": [ 2304, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1916928, "byteOffset": 26913280 }, { "name": "transformer.h.13.attn.c_attn.q_scale", "shape": [ 2304, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 239616, "byteOffset": 28830208 }, { "name": "transformer.h.13.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 29069824 }, { "name": "transformer.h.13.attn.c_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 29074432 }, { "name": "transformer.h.13.attn.c_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 30778368 }, { "name": "transformer.h.13.attn.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30991360 }, { "name": "transformer.h.13.ln_2.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30995456 }, { "name": "transformer.h.13.ln_2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30999552 } ], "md5sum": "a0675f5284e954f8974036cdad0049fb" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 27027968, "records": [ { "name": "transformer.h.13.mlp.c_fc.q_weight", "shape": [ 8192, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6815744, "byteOffset": 0 }, { "name": "transformer.h.13.mlp.c_fc.q_scale", "shape": [ 8192, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 851968, "byteOffset": 6815744 }, { "name": "transformer.h.13.mlp.c_fc.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7667712 }, { "name": "transformer.h.13.mlp.c_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 7684096 }, { "name": "transformer.h.13.mlp.c_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 14401536 }, { "name": "transformer.h.13.mlp.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15241216 }, { "name": "transformer.h.14.ln_1.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15245312 }, { "name": "transformer.h.14.ln_1.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15249408 }, { "name": "transformer.h.14.attn.c_attn.q_weight", "shape": [ 2304, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1916928, "byteOffset": 15253504 }, { "name": "transformer.h.14.attn.c_attn.q_scale", "shape": [ 2304, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 239616, "byteOffset": 17170432 }, { "name": "transformer.h.14.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 17410048 }, { "name": "transformer.h.14.attn.c_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 17414656 }, { "name": "transformer.h.14.attn.c_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 19118592 }, { "name": "transformer.h.14.attn.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19331584 }, { "name": "transformer.h.14.ln_2.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19335680 }, { "name": "transformer.h.14.ln_2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19339776 }, { "name": "transformer.h.14.mlp.c_fc.q_weight", "shape": [ 8192, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6815744, "byteOffset": 19343872 }, { "name": "transformer.h.14.mlp.c_fc.q_scale", "shape": [ 8192, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 851968, "byteOffset": 26159616 }, { "name": "transformer.h.14.mlp.c_fc.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27011584 } ], "md5sum": "cbe10c01e3fc3e1827997af59d861869" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 31003648, "records": [ { "name": "transformer.h.14.mlp.c_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 0 }, { "name": "transformer.h.14.mlp.c_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 6717440 }, { "name": "transformer.h.14.mlp.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 7557120 }, { "name": "transformer.h.15.ln_1.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 7561216 }, { "name": "transformer.h.15.ln_1.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 7565312 }, { "name": "transformer.h.15.attn.c_attn.q_weight", "shape": [ 2304, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1916928, "byteOffset": 7569408 }, { "name": "transformer.h.15.attn.c_attn.q_scale", "shape": [ 2304, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 239616, "byteOffset": 9486336 }, { "name": "transformer.h.15.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 9725952 }, { "name": "transformer.h.15.attn.c_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 9730560 }, { "name": "transformer.h.15.attn.c_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 11434496 }, { "name": "transformer.h.15.attn.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11647488 }, { "name": "transformer.h.15.ln_2.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11651584 }, { "name": "transformer.h.15.ln_2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11655680 }, { "name": "transformer.h.15.mlp.c_fc.q_weight", "shape": [ 8192, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6815744, "byteOffset": 11659776 }, { "name": "transformer.h.15.mlp.c_fc.q_scale", "shape": [ 8192, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 851968, "byteOffset": 18475520 }, { "name": "transformer.h.15.mlp.c_fc.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19327488 }, { "name": "transformer.h.15.mlp.c_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 19343872 }, { "name": "transformer.h.15.mlp.c_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 26061312 }, { "name": "transformer.h.15.mlp.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 26900992 }, { "name": "transformer.h.16.ln_1.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 26905088 }, { "name": "transformer.h.16.ln_1.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 26909184 }, { "name": "transformer.h.16.attn.c_attn.q_weight", "shape": [ 2304, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1916928, "byteOffset": 26913280 }, { "name": "transformer.h.16.attn.c_attn.q_scale", "shape": [ 2304, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 239616, "byteOffset": 28830208 }, { "name": "transformer.h.16.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 29069824 }, { "name": "transformer.h.16.attn.c_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 29074432 }, { "name": "transformer.h.16.attn.c_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 30778368 }, { "name": "transformer.h.16.attn.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30991360 }, { "name": "transformer.h.16.ln_2.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30995456 }, { "name": "transformer.h.16.ln_2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30999552 } ], "md5sum": "0dae6e43d6550288de886eceb7dfb53e" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 27027968, "records": [ { "name": "transformer.h.16.mlp.c_fc.q_weight", "shape": [ 8192, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6815744, "byteOffset": 0 }, { "name": "transformer.h.16.mlp.c_fc.q_scale", "shape": [ 8192, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 851968, "byteOffset": 6815744 }, { "name": "transformer.h.16.mlp.c_fc.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7667712 }, { "name": "transformer.h.16.mlp.c_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 7684096 }, { "name": "transformer.h.16.mlp.c_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 14401536 }, { "name": "transformer.h.16.mlp.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15241216 }, { "name": "transformer.h.17.ln_1.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15245312 }, { "name": "transformer.h.17.ln_1.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15249408 }, { "name": "transformer.h.17.attn.c_attn.q_weight", "shape": [ 2304, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1916928, "byteOffset": 15253504 }, { "name": "transformer.h.17.attn.c_attn.q_scale", "shape": [ 2304, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 239616, "byteOffset": 17170432 }, { "name": "transformer.h.17.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 17410048 }, { "name": "transformer.h.17.attn.c_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 17414656 }, { "name": "transformer.h.17.attn.c_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 19118592 }, { "name": "transformer.h.17.attn.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19331584 }, { "name": "transformer.h.17.ln_2.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19335680 }, { "name": "transformer.h.17.ln_2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19339776 }, { "name": "transformer.h.17.mlp.c_fc.q_weight", "shape": [ 8192, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6815744, "byteOffset": 19343872 }, { "name": "transformer.h.17.mlp.c_fc.q_scale", "shape": [ 8192, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 851968, "byteOffset": 26159616 }, { "name": "transformer.h.17.mlp.c_fc.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27011584 } ], "md5sum": "4b15f38ee7b07220edc1330d5a1ac1dc" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 31003648, "records": [ { "name": "transformer.h.17.mlp.c_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 0 }, { "name": "transformer.h.17.mlp.c_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 6717440 }, { "name": "transformer.h.17.mlp.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 7557120 }, { "name": "transformer.h.18.ln_1.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 7561216 }, { "name": "transformer.h.18.ln_1.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 7565312 }, { "name": "transformer.h.18.attn.c_attn.q_weight", "shape": [ 2304, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1916928, "byteOffset": 7569408 }, { "name": "transformer.h.18.attn.c_attn.q_scale", "shape": [ 2304, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 239616, "byteOffset": 9486336 }, { "name": "transformer.h.18.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 9725952 }, { "name": "transformer.h.18.attn.c_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 9730560 }, { "name": "transformer.h.18.attn.c_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 11434496 }, { "name": "transformer.h.18.attn.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11647488 }, { "name": "transformer.h.18.ln_2.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11651584 }, { "name": "transformer.h.18.ln_2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11655680 }, { "name": "transformer.h.18.mlp.c_fc.q_weight", "shape": [ 8192, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6815744, "byteOffset": 11659776 }, { "name": "transformer.h.18.mlp.c_fc.q_scale", "shape": [ 8192, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 851968, "byteOffset": 18475520 }, { "name": "transformer.h.18.mlp.c_fc.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19327488 }, { "name": "transformer.h.18.mlp.c_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 19343872 }, { "name": "transformer.h.18.mlp.c_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 26061312 }, { "name": "transformer.h.18.mlp.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 26900992 }, { "name": "transformer.h.19.ln_1.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 26905088 }, { "name": "transformer.h.19.ln_1.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 26909184 }, { "name": "transformer.h.19.attn.c_attn.q_weight", "shape": [ 2304, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1916928, "byteOffset": 26913280 }, { "name": "transformer.h.19.attn.c_attn.q_scale", "shape": [ 2304, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 239616, "byteOffset": 28830208 }, { "name": "transformer.h.19.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 29069824 }, { "name": "transformer.h.19.attn.c_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 29074432 }, { "name": "transformer.h.19.attn.c_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 30778368 }, { "name": "transformer.h.19.attn.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30991360 }, { "name": "transformer.h.19.ln_2.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30995456 }, { "name": "transformer.h.19.ln_2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30999552 } ], "md5sum": "8e1f308884d460271e4a169ee5902ba6" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 27027968, "records": [ { "name": "transformer.h.19.mlp.c_fc.q_weight", "shape": [ 8192, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6815744, "byteOffset": 0 }, { "name": "transformer.h.19.mlp.c_fc.q_scale", "shape": [ 8192, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 851968, "byteOffset": 6815744 }, { "name": "transformer.h.19.mlp.c_fc.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7667712 }, { "name": "transformer.h.19.mlp.c_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 7684096 }, { "name": "transformer.h.19.mlp.c_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 14401536 }, { "name": "transformer.h.19.mlp.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15241216 }, { "name": "transformer.h.20.ln_1.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15245312 }, { "name": "transformer.h.20.ln_1.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15249408 }, { "name": "transformer.h.20.attn.c_attn.q_weight", "shape": [ 2304, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1916928, "byteOffset": 15253504 }, { "name": "transformer.h.20.attn.c_attn.q_scale", "shape": [ 2304, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 239616, "byteOffset": 17170432 }, { "name": "transformer.h.20.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 17410048 }, { "name": "transformer.h.20.attn.c_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 17414656 }, { "name": "transformer.h.20.attn.c_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 19118592 }, { "name": "transformer.h.20.attn.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19331584 }, { "name": "transformer.h.20.ln_2.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19335680 }, { "name": "transformer.h.20.ln_2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19339776 }, { "name": "transformer.h.20.mlp.c_fc.q_weight", "shape": [ 8192, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6815744, "byteOffset": 19343872 }, { "name": "transformer.h.20.mlp.c_fc.q_scale", "shape": [ 8192, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 851968, "byteOffset": 26159616 }, { "name": "transformer.h.20.mlp.c_fc.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27011584 } ], "md5sum": "cddf375922759cb0e1ce296fc360285b" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 31003648, "records": [ { "name": "transformer.h.20.mlp.c_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 0 }, { "name": "transformer.h.20.mlp.c_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 6717440 }, { "name": "transformer.h.20.mlp.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 7557120 }, { "name": "transformer.h.21.ln_1.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 7561216 }, { "name": "transformer.h.21.ln_1.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 7565312 }, { "name": "transformer.h.21.attn.c_attn.q_weight", "shape": [ 2304, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1916928, "byteOffset": 7569408 }, { "name": "transformer.h.21.attn.c_attn.q_scale", "shape": [ 2304, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 239616, "byteOffset": 9486336 }, { "name": "transformer.h.21.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 9725952 }, { "name": "transformer.h.21.attn.c_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 9730560 }, { "name": "transformer.h.21.attn.c_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 11434496 }, { "name": "transformer.h.21.attn.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11647488 }, { "name": "transformer.h.21.ln_2.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11651584 }, { "name": "transformer.h.21.ln_2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 11655680 }, { "name": "transformer.h.21.mlp.c_fc.q_weight", "shape": [ 8192, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6815744, "byteOffset": 11659776 }, { "name": "transformer.h.21.mlp.c_fc.q_scale", "shape": [ 8192, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 851968, "byteOffset": 18475520 }, { "name": "transformer.h.21.mlp.c_fc.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19327488 }, { "name": "transformer.h.21.mlp.c_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 19343872 }, { "name": "transformer.h.21.mlp.c_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 26061312 }, { "name": "transformer.h.21.mlp.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 26900992 }, { "name": "transformer.h.22.ln_1.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 26905088 }, { "name": "transformer.h.22.ln_1.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 26909184 }, { "name": "transformer.h.22.attn.c_attn.q_weight", "shape": [ 2304, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1916928, "byteOffset": 26913280 }, { "name": "transformer.h.22.attn.c_attn.q_scale", "shape": [ 2304, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 239616, "byteOffset": 28830208 }, { "name": "transformer.h.22.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 29069824 }, { "name": "transformer.h.22.attn.c_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 29074432 }, { "name": "transformer.h.22.attn.c_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 30778368 }, { "name": "transformer.h.22.attn.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30991360 }, { "name": "transformer.h.22.ln_2.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30995456 }, { "name": "transformer.h.22.ln_2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30999552 } ], "md5sum": "4b195d379c0b42d77e987ed6e53bf7a6" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 27027968, "records": [ { "name": "transformer.h.22.mlp.c_fc.q_weight", "shape": [ 8192, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6815744, "byteOffset": 0 }, { "name": "transformer.h.22.mlp.c_fc.q_scale", "shape": [ 8192, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 851968, "byteOffset": 6815744 }, { "name": "transformer.h.22.mlp.c_fc.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7667712 }, { "name": "transformer.h.22.mlp.c_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 7684096 }, { "name": "transformer.h.22.mlp.c_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 14401536 }, { "name": "transformer.h.22.mlp.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15241216 }, { "name": "transformer.h.23.ln_1.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15245312 }, { "name": "transformer.h.23.ln_1.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15249408 }, { "name": "transformer.h.23.attn.c_attn.q_weight", "shape": [ 2304, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1916928, "byteOffset": 15253504 }, { "name": "transformer.h.23.attn.c_attn.q_scale", "shape": [ 2304, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 239616, "byteOffset": 17170432 }, { "name": "transformer.h.23.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 17410048 }, { "name": "transformer.h.23.attn.c_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 17414656 }, { "name": "transformer.h.23.attn.c_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 19118592 }, { "name": "transformer.h.23.attn.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19331584 }, { "name": "transformer.h.23.ln_2.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19335680 }, { "name": "transformer.h.23.ln_2.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 19339776 }, { "name": "transformer.h.23.mlp.c_fc.q_weight", "shape": [ 8192, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6815744, "byteOffset": 19343872 }, { "name": "transformer.h.23.mlp.c_fc.q_scale", "shape": [ 8192, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 851968, "byteOffset": 26159616 }, { "name": "transformer.h.23.mlp.c_fc.bias", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27011584 } ], "md5sum": "1fe7215bed7fabf9108de3cbe34937d3" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 41000960, "records": [ { "name": "lm_head.q_weight", "shape": [ 49280, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41000960, "byteOffset": 0 } ], "md5sum": "606523ae3635513dfd6f17ee8295e81e" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 12694528, "records": [ { "name": "transformer.h.23.mlp.c_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 0 }, { "name": "transformer.h.23.mlp.c_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 6717440 }, { "name": "transformer.h.23.mlp.c_proj.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 7557120 }, { "name": "transformer.ln_f.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 7561216 }, { "name": "transformer.ln_f.bias", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 7565312 }, { "name": "lm_head.q_scale", "shape": [ 49280, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5125120, "byteOffset": 7569408 } ], "md5sum": "5fe41842755fedf4d128d8d6a69bf90c" } ] }