{ "metadata": { "ParamSize": 648, "ParamBytes": 9896353792.0, "BitsPerParam": 5.004652436926966 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "lm_head.q_weight", "shape": [ 49152, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "a1f8ce3e53ff06c74298bc4a4ebed4aa" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.38.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "17e0a2cc25a010d9a281725954af99fb" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.38.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ddc0131046a8e82d3bc431e581fe420e" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.38.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "98f0cbf1ce8a1ff0f62c289905443a2e" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 30769152, "records": [ { "name": "lm_head.q_scale", "shape": [ 49152, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 }, { "name": "transformer.h.38.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "transformer.h.38.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "transformer.h.38.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21245952 }, { "name": "transformer.h.38.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21258240 }, { "name": "transformer.h.38.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 21270528 }, { "name": "transformer.h.38.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 21319680 }, { "name": "transformer.h.38.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30756864 } ], "md5sum": "b356a1c19101d7459e53b52f47a27fab" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.39.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "29b8f9697706d12efb4c360f13b03244" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 31580672, "records": [ { "name": "transformer.h.38.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.39.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.39.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.39.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.39.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 } ], "md5sum": "f5ae547b303f5ad28cec7da3a9d55143" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.39.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "c37267f95b390f392d73836fe62b8569" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.39.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "5b118b3d7fd8b5be3de643e29ba233bd" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.0.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "a8b4333e2beeb76550dd72ba9f6d2118" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.0.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e377affd5800cefd5a0148dda6ea42cc" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.0.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "59c4f9275d1cfa7f90c7e1259cc6708f" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 26309120, "records": [ { "name": "transformer.h.39.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "transformer.h.39.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2359296 }, { "name": "transformer.h.39.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2371584 }, { "name": "transformer.h.39.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2383872 }, { "name": "transformer.h.39.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2396160 }, { "name": "transformer.h.39.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2408448 }, { "name": "transformer.h.39.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2457600 }, { "name": "transformer.h.39.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 11894784 }, { "name": "transformer.h.39.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11907072 }, { "name": "transformer.ln_f.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21344256 }, { "name": "transformer.ln_f.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21356544 }, { "name": "transformer.h.0.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 21368832 }, { "name": "transformer.h.0.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 21381632 }, { "name": "transformer.h.0.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23839232 }, { "name": "transformer.h.0.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23851520 }, { "name": "transformer.h.0.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26210816 }, { "name": "transformer.h.0.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26223104 }, { "name": "transformer.h.0.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26235392 }, { "name": "transformer.h.0.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26247680 }, { "name": "transformer.h.0.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 26259968 } ], "md5sum": "bef65549b19839f86cc56fcbebec4ef1" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.0.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d6d4a6890452d4c23d71b3c4910612a5" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.1.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "657eec2f43e3bff41abc8c0eb6bb0c5e" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.1.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "f6160c24cd8d70fdb9651b61b0c8610e" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.1.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "cb75511185bb967c589b851d275b7d15" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.1.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0d3d42e9bbfd7c5b10dcdd80d796e822" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33276416, "records": [ { "name": "transformer.h.0.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.0.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9437184 }, { "name": "transformer.h.0.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 9449472 }, { "name": "transformer.h.1.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 18886656 }, { "name": "transformer.h.1.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 18899456 }, { "name": "transformer.h.1.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21357056 }, { "name": "transformer.h.1.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21369344 }, { "name": "transformer.h.1.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23728640 }, { "name": "transformer.h.1.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23740928 }, { "name": "transformer.h.1.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23753216 }, { "name": "transformer.h.1.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23765504 }, { "name": "transformer.h.1.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 23777792 }, { "name": "transformer.h.1.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23826944 }, { "name": "transformer.h.1.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 33264128 } ], "md5sum": "a5c58df854fce329e0fce5c79ccc0709" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.2.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "c549294e9cf331b320f31640c10b7c64" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 31580672, "records": [ { "name": "transformer.h.1.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.2.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.2.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.2.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.2.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 } ], "md5sum": "2394ba8ff60c512d3296ca4981fd37c2" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.2.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d805803a760a22be3731d20d756bb126" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.2.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "59a5bd4ad3758674128ff9997c900f2b" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.3.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "a6c803b1db63f7752278022e2add76c1" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.3.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "4b0482f33b5a317031a55456a7c01b9b" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.3.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b47a2a0c04f3bcfe9ec3fe9e820499ab" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 26284544, "records": [ { "name": "transformer.h.2.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "transformer.h.2.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2359296 }, { "name": "transformer.h.2.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2371584 }, { "name": "transformer.h.2.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2383872 }, { "name": "transformer.h.2.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2396160 }, { "name": "transformer.h.2.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2408448 }, { "name": "transformer.h.2.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2457600 }, { "name": "transformer.h.2.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 11894784 }, { "name": "transformer.h.2.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11907072 }, { "name": "transformer.h.3.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 21344256 }, { "name": "transformer.h.3.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 21357056 }, { "name": "transformer.h.3.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23814656 }, { "name": "transformer.h.3.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23826944 }, { "name": "transformer.h.3.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26186240 }, { "name": "transformer.h.3.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26198528 }, { "name": "transformer.h.3.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26210816 }, { "name": "transformer.h.3.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26223104 }, { "name": "transformer.h.3.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 26235392 } ], "md5sum": "0c9e571d339f97b9a25c914b38970dc4" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.3.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "2e1735f3a4b27a621481f9b492f53447" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.4.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "d78f188e91b989041e570d423894b01a" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.4.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "ed69094322d4b2b873adc765cf8ff72d" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.4.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "2a5e4daf6b33762b7a01be62b1843b0c" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.4.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "c90d7e03e61b729c5fc0672813b14134" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 33276416, "records": [ { "name": "transformer.h.3.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.3.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9437184 }, { "name": "transformer.h.3.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 9449472 }, { "name": "transformer.h.4.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 18886656 }, { "name": "transformer.h.4.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 18899456 }, { "name": "transformer.h.4.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21357056 }, { "name": "transformer.h.4.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21369344 }, { "name": "transformer.h.4.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23728640 }, { "name": "transformer.h.4.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23740928 }, { "name": "transformer.h.4.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23753216 }, { "name": "transformer.h.4.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23765504 }, { "name": "transformer.h.4.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 23777792 }, { "name": "transformer.h.4.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23826944 }, { "name": "transformer.h.4.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 33264128 } ], "md5sum": "41ebf0b64908e277edb2e618da2c9cf7" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.5.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e8db3469a14e9873ac244aaf704a13ab" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 31580672, "records": [ { "name": "transformer.h.4.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.5.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.5.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.5.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.5.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 } ], "md5sum": "d6661b0fcf4135eb6133222dcae7f373" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.5.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "01a9e05d2e07c5ecf2ec4fd77e7acba4" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.wpe.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7a1e7b6a0155d0210a0242f731c3d54f" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "transformer.wte.q_weight", "shape": [ 49152, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "a1f8ce3e53ff06c74298bc4a4ebed4aa" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.wte.q_scale", "shape": [ 49152, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0653147e2715fd1cc5a5200120f52b6f" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.10.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "717cd73067301b08363602c0e35d6d5e" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.10.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "8623474afebbbb291a09d0e65754a187" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.10.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "f92b1bcb70460fe057e0d30d9a7b388e" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.10.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "bb123af6b78dd6f9b5591c08005792c8" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 29430272, "records": [ { "name": "transformer.h.5.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "transformer.h.5.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2359296 }, { "name": "transformer.h.5.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2371584 }, { "name": "transformer.h.5.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2383872 }, { "name": "transformer.h.5.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2396160 }, { "name": "transformer.h.5.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2408448 }, { "name": "transformer.h.5.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2457600 }, { "name": "transformer.wpe.q_scale", "shape": [ 8192, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11894784 }, { "name": "transformer.h.10.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 15040512 }, { "name": "transformer.h.10.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 15053312 }, { "name": "transformer.h.10.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17510912 }, { "name": "transformer.h.10.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 17523200 }, { "name": "transformer.h.10.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19882496 }, { "name": "transformer.h.10.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19894784 }, { "name": "transformer.h.10.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19907072 }, { "name": "transformer.h.10.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19919360 }, { "name": "transformer.h.10.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 19931648 }, { "name": "transformer.h.10.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 19980800 }, { "name": "transformer.h.10.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 29417984 } ], "md5sum": "6503fbfa6d4d0d168fd6b3c7f908aace" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.11.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "53690a0d4aaef30b6745700dce81ac08" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 31580672, "records": [ { "name": "transformer.h.10.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.11.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.11.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.11.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.11.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 } ], "md5sum": "8e8e83647da34fc0667c3184d67b3059" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.11.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b07eff4c1d6d06ec6d80e18b8cebd53e" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.11.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b49f828c560ebc0c16d0bcfb268b5557" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.12.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "df3145595ed10fbfd24dc24fd869e8c6" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.5.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "225b638ba0dffda920dd7a1d7bc5f933" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.6.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "da176670c3040141e9e2c635e134be0e" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 33301504, "records": [ { "name": "transformer.h.11.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "transformer.h.11.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2359296 }, { "name": "transformer.h.11.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2371584 }, { "name": "transformer.h.11.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2383872 }, { "name": "transformer.h.11.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2396160 }, { "name": "transformer.h.11.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2408448 }, { "name": "transformer.h.11.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2457600 }, { "name": "transformer.h.11.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 11894784 }, { "name": "transformer.h.11.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11907072 }, { "name": "transformer.h.12.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 21344256 }, { "name": "transformer.h.12.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 21357056 }, { "name": "transformer.h.12.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23814656 }, { "name": "transformer.h.12.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23826944 }, { "name": "transformer.h.5.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23839232 }, { "name": "transformer.h.5.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23851520 }, { "name": "transformer.h.6.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 33288704 } ], "md5sum": "45e93e9814b25812522f0fc47c126847" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.6.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "64608163c22541be3142fb625b422cc2" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.6.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0dd8b2f4093baf9478ff76198282092e" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 33251328, "records": [ { "name": "transformer.h.6.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 0 }, { "name": "transformer.h.6.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2457600 }, { "name": "transformer.h.6.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 2469888 }, { "name": "transformer.h.6.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21344256 }, { "name": "transformer.h.6.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23703552 }, { "name": "transformer.h.6.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23715840 }, { "name": "transformer.h.6.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23728128 }, { "name": "transformer.h.6.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23740416 }, { "name": "transformer.h.6.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 23752704 }, { "name": "transformer.h.6.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23801856 }, { "name": "transformer.h.6.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 33239040 } ], "md5sum": "f63d8697334eec251d8206635efb1c83" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.7.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "408a452b4efbca2d79fdb5bf0ee655ae" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 31580672, "records": [ { "name": "transformer.h.6.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.7.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.7.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.7.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.7.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 } ], "md5sum": "4a445e33c7931da70d5a29a979040b65" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.7.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "4ac49ad9532369907fce4868bc4ff244" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.7.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "494fecd177dd5d4b8a4ee40de9d8dbf0" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.8.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "2d33ecddb0968e7b1dbe8b4d4c936139" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.8.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "9856a59c05a56c7757a1c4d8886311a6" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.8.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "9279a0a495df42e036b2fc58a2b7ae97" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 26284544, "records": [ { "name": "transformer.h.7.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "transformer.h.7.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2359296 }, { "name": "transformer.h.7.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2371584 }, { "name": "transformer.h.7.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2383872 }, { "name": "transformer.h.7.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2396160 }, { "name": "transformer.h.7.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2408448 }, { "name": "transformer.h.7.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2457600 }, { "name": "transformer.h.7.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 11894784 }, { "name": "transformer.h.7.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11907072 }, { "name": "transformer.h.8.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 21344256 }, { "name": "transformer.h.8.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 21357056 }, { "name": "transformer.h.8.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23814656 }, { "name": "transformer.h.8.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23826944 }, { "name": "transformer.h.8.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26186240 }, { "name": "transformer.h.8.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26198528 }, { "name": "transformer.h.8.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26210816 }, { "name": "transformer.h.8.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26223104 }, { "name": "transformer.h.8.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 26235392 } ], "md5sum": "ccac7922f8d0b92c2a6592aca60a398c" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.8.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "8915d2342c55d28371a8aee3d996a02c" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.9.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "5631917a8518461388d0f723f3b9cccf" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.9.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "c8bb0c1c0db733ba726403329c37157c" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.9.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "002be25a681c6d9ab7fddc56f5cba9d0" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.9.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "4a4b9dd3fb825e9d5c176bfda03b7648" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 33276416, "records": [ { "name": "transformer.h.8.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.8.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9437184 }, { "name": "transformer.h.8.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 9449472 }, { "name": "transformer.h.9.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 18886656 }, { "name": "transformer.h.9.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 18899456 }, { "name": "transformer.h.9.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21357056 }, { "name": "transformer.h.9.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21369344 }, { "name": "transformer.h.9.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23728640 }, { "name": "transformer.h.9.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23740928 }, { "name": "transformer.h.9.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23753216 }, { "name": "transformer.h.9.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23765504 }, { "name": "transformer.h.9.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 23777792 }, { "name": "transformer.h.9.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23826944 }, { "name": "transformer.h.9.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 33264128 } ], "md5sum": "c4f74868cdb8908ed7eb750a7aa5497c" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.12.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "63d6dcc2ed5f2b46308b180211219846" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 30756864, "records": [ { "name": "transformer.h.9.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.12.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9437184 }, { "name": "transformer.h.12.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 9449472 }, { "name": "transformer.h.12.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 28323840 }, { "name": "transformer.h.12.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30683136 }, { "name": "transformer.h.12.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30695424 }, { "name": "transformer.h.12.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 30707712 } ], "md5sum": "9f1c4d2d96e099b599e2979ee0463901" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.12.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e75f991d746e4feb712bc9089e4ad4f0" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.13.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "7907aa46d8cf305fbe9531dd1dd51b75" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.13.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "eb7fff5c9e42f305f3e6bf83e1e13e70" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.13.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "200400fe4eb7c633c9ed22580cc20139" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.13.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "1129f9663dadca9b853f01711761b122" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 33276416, "records": [ { "name": "transformer.h.12.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.12.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9437184 }, { "name": "transformer.h.12.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 9449472 }, { "name": "transformer.h.13.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 18886656 }, { "name": "transformer.h.13.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 18899456 }, { "name": "transformer.h.13.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21357056 }, { "name": "transformer.h.13.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21369344 }, { "name": "transformer.h.13.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23728640 }, { "name": "transformer.h.13.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23740928 }, { "name": "transformer.h.13.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23753216 }, { "name": "transformer.h.13.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23765504 }, { "name": "transformer.h.13.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 23777792 }, { "name": "transformer.h.13.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23826944 }, { "name": "transformer.h.13.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 33264128 } ], "md5sum": "2187e4f3c5d157f2fafe36d5198afbce" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.14.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "1436f44190553727c14212c4462ea3df" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 31580672, "records": [ { "name": "transformer.h.13.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.14.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.14.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.14.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.14.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 } ], "md5sum": "af7791cfa3c5041ed92e1a01a1ff5914" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.14.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "120b7e60779129f74289ba9f01597cb8" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.14.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ec93fbecd661af0030dd648615b38069" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.15.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "d53f85b4640a1ce824a03d72e7622ca1" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.15.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "85c6949b4f88c6ac9d2abe03391bc128" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.15.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0e91cc70ef4d5debaef3fcb72cbd76cb" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 26284544, "records": [ { "name": "transformer.h.14.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "transformer.h.14.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2359296 }, { "name": "transformer.h.14.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2371584 }, { "name": "transformer.h.14.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2383872 }, { "name": "transformer.h.14.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2396160 }, { "name": "transformer.h.14.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2408448 }, { "name": "transformer.h.14.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2457600 }, { "name": "transformer.h.14.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 11894784 }, { "name": "transformer.h.14.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11907072 }, { "name": "transformer.h.15.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 21344256 }, { "name": "transformer.h.15.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 21357056 }, { "name": "transformer.h.15.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23814656 }, { "name": "transformer.h.15.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23826944 }, { "name": "transformer.h.15.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26186240 }, { "name": "transformer.h.15.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26198528 }, { "name": "transformer.h.15.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26210816 }, { "name": "transformer.h.15.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26223104 }, { "name": "transformer.h.15.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 26235392 } ], "md5sum": "8cd32e75d754375b649edd0d720687db" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.15.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "3e0dcc2203b02bd22045bdff7fa7cf21" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.16.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "5decdaf1f8dcc0df0718fd81589fac8c" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.16.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "4f1d489a636017fd8106c4aa3debb9f8" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.16.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "8daab04e62b05513d8827922f392ace9" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.16.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "9641926287aa9353519affd490b0dd4b" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 33276416, "records": [ { "name": "transformer.h.15.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.15.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9437184 }, { "name": "transformer.h.15.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 9449472 }, { "name": "transformer.h.16.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 18886656 }, { "name": "transformer.h.16.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 18899456 }, { "name": "transformer.h.16.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21357056 }, { "name": "transformer.h.16.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21369344 }, { "name": "transformer.h.16.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23728640 }, { "name": "transformer.h.16.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23740928 }, { "name": "transformer.h.16.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23753216 }, { "name": "transformer.h.16.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23765504 }, { "name": "transformer.h.16.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 23777792 }, { "name": "transformer.h.16.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23826944 }, { "name": "transformer.h.16.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 33264128 } ], "md5sum": "6affc3ce90d1f2369a5d988f3f7d206d" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.17.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "b1165987cfd03932a4b81d774d082668" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 31580672, "records": [ { "name": "transformer.h.16.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.17.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.17.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.17.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.17.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 } ], "md5sum": "635e79fc837ea7949fe073fec4109310" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.17.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "041abd92a6b2a7721836b564bbda3c1f" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.17.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0fdd99f396a9ea911e0605f5f2da1990" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.18.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "16075705d7bb1506b01ffc4a6d60c072" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.18.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "3573ce5c407d642699b52e6fe7142e5d" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.18.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "70ba0841ef7ede7fb24c4de24cf1be15" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 26284544, "records": [ { "name": "transformer.h.17.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "transformer.h.17.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2359296 }, { "name": "transformer.h.17.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2371584 }, { "name": "transformer.h.17.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2383872 }, { "name": "transformer.h.17.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2396160 }, { "name": "transformer.h.17.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2408448 }, { "name": "transformer.h.17.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2457600 }, { "name": "transformer.h.17.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 11894784 }, { "name": "transformer.h.17.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11907072 }, { "name": "transformer.h.18.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 21344256 }, { "name": "transformer.h.18.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 21357056 }, { "name": "transformer.h.18.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23814656 }, { "name": "transformer.h.18.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23826944 }, { "name": "transformer.h.18.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26186240 }, { "name": "transformer.h.18.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26198528 }, { "name": "transformer.h.18.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26210816 }, { "name": "transformer.h.18.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26223104 }, { "name": "transformer.h.18.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 26235392 } ], "md5sum": "d97288b33559552e5d36780133798d4d" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.18.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "110da2339e6408566494341a4c82dd48" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.19.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "1699da312d4e8759e0a19e9386e68784" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.19.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "58936aeacb32197409832c96372d3c5d" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.19.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "f1e7e380c51d70597585b0f05e721746" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.19.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "f196b55e163189198cc9412710d32d54" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 33276416, "records": [ { "name": "transformer.h.18.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.18.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9437184 }, { "name": "transformer.h.18.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 9449472 }, { "name": "transformer.h.19.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 18886656 }, { "name": "transformer.h.19.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 18899456 }, { "name": "transformer.h.19.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21357056 }, { "name": "transformer.h.19.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21369344 }, { "name": "transformer.h.19.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23728640 }, { "name": "transformer.h.19.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23740928 }, { "name": "transformer.h.19.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23753216 }, { "name": "transformer.h.19.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23765504 }, { "name": "transformer.h.19.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 23777792 }, { "name": "transformer.h.19.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23826944 }, { "name": "transformer.h.19.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 33264128 } ], "md5sum": "e1758fb4ee3137089702d4be5cfc8d0c" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.20.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "11ac7d28aeb3d96eadf85a4c40c25461" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 31580672, "records": [ { "name": "transformer.h.19.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.20.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.20.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.20.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.20.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 } ], "md5sum": "7e2839754a83643ea3f17bed2458fee0" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.20.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e950f5f447253f5ad971162f81f32be7" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.20.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "91f8f3423fc3cee70eafb06214683194" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.21.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "f8065f6cf0542783384b0df7e27e74d9" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.21.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "a66f7efe305bfa44ac768b2f2d1554a3" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.21.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "611a2f01834eb4c619c1bca613bdd061" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 26284544, "records": [ { "name": "transformer.h.20.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "transformer.h.20.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2359296 }, { "name": "transformer.h.20.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2371584 }, { "name": "transformer.h.20.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2383872 }, { "name": "transformer.h.20.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2396160 }, { "name": "transformer.h.20.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2408448 }, { "name": "transformer.h.20.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2457600 }, { "name": "transformer.h.20.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 11894784 }, { "name": "transformer.h.20.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11907072 }, { "name": "transformer.h.21.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 21344256 }, { "name": "transformer.h.21.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 21357056 }, { "name": "transformer.h.21.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23814656 }, { "name": "transformer.h.21.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23826944 }, { "name": "transformer.h.21.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26186240 }, { "name": "transformer.h.21.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26198528 }, { "name": "transformer.h.21.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26210816 }, { "name": "transformer.h.21.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26223104 }, { "name": "transformer.h.21.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 26235392 } ], "md5sum": "6d2f7c4fed47b874405bef92bb8276de" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.21.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e4bbde639174e283a6cc07ed824bc04c" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.22.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "0d38d6416cb3dce9ba9e495ec608c961" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.22.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "d3b9f52f17de3ed9a98c905a5d7b3056" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.22.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "185cc76985b5e1eb40f67e91c559083c" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.22.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "3e76004fbd640a02eee2772e582e0210" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 33276416, "records": [ { "name": "transformer.h.21.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.21.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9437184 }, { "name": "transformer.h.21.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 9449472 }, { "name": "transformer.h.22.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 18886656 }, { "name": "transformer.h.22.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 18899456 }, { "name": "transformer.h.22.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21357056 }, { "name": "transformer.h.22.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21369344 }, { "name": "transformer.h.22.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23728640 }, { "name": "transformer.h.22.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23740928 }, { "name": "transformer.h.22.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23753216 }, { "name": "transformer.h.22.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23765504 }, { "name": "transformer.h.22.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 23777792 }, { "name": "transformer.h.22.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23826944 }, { "name": "transformer.h.22.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 33264128 } ], "md5sum": "f78ed2379a51d95e3d6f4e9d517c0d49" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.23.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "8e4150d91e44c011105b5a12824eb5d0" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 31580672, "records": [ { "name": "transformer.h.22.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.23.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.23.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.23.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.23.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 } ], "md5sum": "1b2f391eb7339f08c24b680b272cecf2" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.23.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ea2b8ee11895f17f0c62d36bad6a3974" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.23.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "a87c9d39dd2f4ae98ddf82781781baac" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.24.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "3cc9dcc31372b204763852879be8ecd1" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.24.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "3b55bf20d31e330d667ddd2492c2bb24" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.24.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "90d28ad8925efa6e5cd7ad0481794e13" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 26284544, "records": [ { "name": "transformer.h.23.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "transformer.h.23.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2359296 }, { "name": "transformer.h.23.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2371584 }, { "name": "transformer.h.23.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2383872 }, { "name": "transformer.h.23.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2396160 }, { "name": "transformer.h.23.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2408448 }, { "name": "transformer.h.23.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2457600 }, { "name": "transformer.h.23.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 11894784 }, { "name": "transformer.h.23.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11907072 }, { "name": "transformer.h.24.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 21344256 }, { "name": "transformer.h.24.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 21357056 }, { "name": "transformer.h.24.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23814656 }, { "name": "transformer.h.24.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23826944 }, { "name": "transformer.h.24.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26186240 }, { "name": "transformer.h.24.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26198528 }, { "name": "transformer.h.24.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26210816 }, { "name": "transformer.h.24.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26223104 }, { "name": "transformer.h.24.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 26235392 } ], "md5sum": "ef0d16395f2f61ae35ab0ea8a347b0c8" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.24.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "564622c1c313e46a28ac75c99ff1883f" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.25.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "bfb6d2a773869a54a44c09f46d0d9312" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.25.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "77d3a7a4a7fd44afea00e31e23d8ff45" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.25.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e1ee86c4de8001c6ef20892799b18012" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.25.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d9016833198578324ea828e3509d7219" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 33276416, "records": [ { "name": "transformer.h.24.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.24.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9437184 }, { "name": "transformer.h.24.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 9449472 }, { "name": "transformer.h.25.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 18886656 }, { "name": "transformer.h.25.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 18899456 }, { "name": "transformer.h.25.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21357056 }, { "name": "transformer.h.25.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21369344 }, { "name": "transformer.h.25.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21381632 }, { "name": "transformer.h.25.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21393920 }, { "name": "transformer.h.25.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23753216 }, { "name": "transformer.h.25.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23765504 }, { "name": "transformer.h.25.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 23777792 }, { "name": "transformer.h.25.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23826944 }, { "name": "transformer.h.25.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 33264128 } ], "md5sum": "e53f95b3e94c840042174c3c6c3eadc3" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.26.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "14f182624b8f5e10c99d6cf6ee25e7b7" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 31580672, "records": [ { "name": "transformer.h.25.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.26.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.26.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.26.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.26.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 } ], "md5sum": "b89850e92825a95a291986ddd4897017" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.26.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "6ce6fdb68ad26eadb2da67eaa1a1f36e" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.26.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e9fa5c2966e5ccfa445c5d84bec36b10" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.27.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "722734886418255fc04e7a3796dc6765" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.27.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "d2ac90783e2275189afbe82621906057" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.27.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "976e4fec289b9ba1fc9b52e523348977" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 26284544, "records": [ { "name": "transformer.h.26.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "transformer.h.26.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2359296 }, { "name": "transformer.h.26.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2371584 }, { "name": "transformer.h.26.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2383872 }, { "name": "transformer.h.26.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2396160 }, { "name": "transformer.h.26.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2408448 }, { "name": "transformer.h.26.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2457600 }, { "name": "transformer.h.26.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 11894784 }, { "name": "transformer.h.26.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11907072 }, { "name": "transformer.h.27.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 21344256 }, { "name": "transformer.h.27.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 21357056 }, { "name": "transformer.h.27.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23814656 }, { "name": "transformer.h.27.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23826944 }, { "name": "transformer.h.27.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26186240 }, { "name": "transformer.h.27.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26198528 }, { "name": "transformer.h.27.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26210816 }, { "name": "transformer.h.27.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26223104 }, { "name": "transformer.h.27.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 26235392 } ], "md5sum": "a66c5b3ea1a7cf52fd2a5a3e7302536d" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.27.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "55f0201f176f406ad44112482555cebf" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.28.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "a8f6a604dfbfcc61a25e3196fd4e0791" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.28.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "4c8cb2476d21c193a7d6afece147f83a" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.28.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "5c5e364bec8ddbb83c546bc1f966c4e9" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.28.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e6f3447a65dfc8cc3085ee097b85207a" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 33276416, "records": [ { "name": "transformer.h.27.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.27.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9437184 }, { "name": "transformer.h.27.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 9449472 }, { "name": "transformer.h.28.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 18886656 }, { "name": "transformer.h.28.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 18899456 }, { "name": "transformer.h.28.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21357056 }, { "name": "transformer.h.28.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21369344 }, { "name": "transformer.h.28.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23728640 }, { "name": "transformer.h.28.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23740928 }, { "name": "transformer.h.28.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23753216 }, { "name": "transformer.h.28.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23765504 }, { "name": "transformer.h.28.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 23777792 }, { "name": "transformer.h.28.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23826944 }, { "name": "transformer.h.28.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 33264128 } ], "md5sum": "6efa742897b5b61bbee5f6417e3d19c3" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.29.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "80cf6d2a01b73e2d913fbf2035426fac" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 31580672, "records": [ { "name": "transformer.h.28.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.29.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.29.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.29.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.29.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 } ], "md5sum": "b89b92488fc1ee83a5541bacbe665213" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.29.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b16b54991277ba85488d9e03160aed0b" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.29.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "447f9b53ff319903e32e1113fa9fa683" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.30.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "745745034a1ad129de5fed507a8b5893" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.30.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "2e1b3c20a38d0a50a4a2ee54519fd844" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.30.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b6b3b1c56f256f755831da6512b0b0ae" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 26284544, "records": [ { "name": "transformer.h.29.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "transformer.h.29.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2359296 }, { "name": "transformer.h.29.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2371584 }, { "name": "transformer.h.29.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2383872 }, { "name": "transformer.h.29.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2396160 }, { "name": "transformer.h.29.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2408448 }, { "name": "transformer.h.29.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2457600 }, { "name": "transformer.h.29.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 11894784 }, { "name": "transformer.h.29.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11907072 }, { "name": "transformer.h.30.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 21344256 }, { "name": "transformer.h.30.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 21357056 }, { "name": "transformer.h.30.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23814656 }, { "name": "transformer.h.30.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23826944 }, { "name": "transformer.h.30.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26186240 }, { "name": "transformer.h.30.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26198528 }, { "name": "transformer.h.30.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26210816 }, { "name": "transformer.h.30.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26223104 }, { "name": "transformer.h.30.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 26235392 } ], "md5sum": "2c8f4580ffbc220dae40684cad072ab3" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.30.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "a53a6c512815fe44db791afa92c1dd9d" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.31.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "920dee997cef318438bd4cdf205272a5" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.31.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "99349b3991d094d9cf72ea013d08a552" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.31.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "9fc9fb204d9c5c2ff25613c1faabf877" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.31.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ac71bcb1897bba5f94e7560e750895c9" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 33276416, "records": [ { "name": "transformer.h.30.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.30.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9437184 }, { "name": "transformer.h.30.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 9449472 }, { "name": "transformer.h.31.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 18886656 }, { "name": "transformer.h.31.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 18899456 }, { "name": "transformer.h.31.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21357056 }, { "name": "transformer.h.31.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21369344 }, { "name": "transformer.h.31.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23728640 }, { "name": "transformer.h.31.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23740928 }, { "name": "transformer.h.31.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23753216 }, { "name": "transformer.h.31.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23765504 }, { "name": "transformer.h.31.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 23777792 }, { "name": "transformer.h.31.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23826944 }, { "name": "transformer.h.31.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 33264128 } ], "md5sum": "637cacdc791858b47f1aed1b80ecac33" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.32.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "33f7bf416224d49a4bff47074d76e01c" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 31580672, "records": [ { "name": "transformer.h.31.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.32.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.32.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.32.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.32.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 } ], "md5sum": "bd758ba59581eed3daec2bcc552194b4" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.32.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "c7c57d06373c8a367cf83685c471756b" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.32.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "07d1948861eb8cfc722dad30b6d12d2c" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.33.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "8a1d986ab2e90093721fb8618de6bbac" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.33.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "5ca39ea6c8656e07ad191bf4b59c20c6" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.33.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0d7aa19d0bc9d87c5eeddf9c943451f1" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 26284544, "records": [ { "name": "transformer.h.32.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "transformer.h.32.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2359296 }, { "name": "transformer.h.32.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2371584 }, { "name": "transformer.h.32.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2383872 }, { "name": "transformer.h.32.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2396160 }, { "name": "transformer.h.32.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2408448 }, { "name": "transformer.h.32.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2457600 }, { "name": "transformer.h.32.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 11894784 }, { "name": "transformer.h.32.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11907072 }, { "name": "transformer.h.33.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 21344256 }, { "name": "transformer.h.33.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 21357056 }, { "name": "transformer.h.33.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23814656 }, { "name": "transformer.h.33.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23826944 }, { "name": "transformer.h.33.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26186240 }, { "name": "transformer.h.33.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26198528 }, { "name": "transformer.h.33.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26210816 }, { "name": "transformer.h.33.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26223104 }, { "name": "transformer.h.33.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 26235392 } ], "md5sum": "9221e022c3869ea97ad8ae3d9925d96c" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.33.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "954bfb39aad84aaa33f8b454af133ec0" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.34.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "968cbf2fef09d43ed0be7add908b6c64" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.34.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "1f9923cebf5def5c59b4bcd8e107af7c" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.34.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b56fd5a2333602e51cab57c860ba25a1" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.34.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "f98daa90419b679971be19dce9914b1b" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 33276416, "records": [ { "name": "transformer.h.33.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.33.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9437184 }, { "name": "transformer.h.33.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 9449472 }, { "name": "transformer.h.34.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 18886656 }, { "name": "transformer.h.34.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 18899456 }, { "name": "transformer.h.34.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21357056 }, { "name": "transformer.h.34.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21369344 }, { "name": "transformer.h.34.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23728640 }, { "name": "transformer.h.34.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23740928 }, { "name": "transformer.h.34.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23753216 }, { "name": "transformer.h.34.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23765504 }, { "name": "transformer.h.34.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 23777792 }, { "name": "transformer.h.34.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23826944 }, { "name": "transformer.h.34.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 33264128 } ], "md5sum": "c8ed0e9a7baae8fb5fe28f940b2c5ee6" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.35.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "2a2d1d6c0bff53cda69d483db9ba8d5a" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 31580672, "records": [ { "name": "transformer.h.34.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.35.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.35.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.35.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.35.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 } ], "md5sum": "b475e338f4f25a1434a75d336001a7cf" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.35.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0d65139928e92d6078a2c300d001ccd2" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.35.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "970b7ef4a1d2d82e9b20d3f40fe4b481" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.36.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "7c99b8fa4db88c6213d6ea09b4a89867" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.36.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0a8c7f7c85fe8bddb3bdca7e510d1c77" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.36.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "7f3b43500f12d916b524d3d76fd88834" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 26284544, "records": [ { "name": "transformer.h.35.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "transformer.h.35.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2359296 }, { "name": "transformer.h.35.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2371584 }, { "name": "transformer.h.35.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2383872 }, { "name": "transformer.h.35.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2396160 }, { "name": "transformer.h.35.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2408448 }, { "name": "transformer.h.35.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2457600 }, { "name": "transformer.h.35.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 11894784 }, { "name": "transformer.h.35.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11907072 }, { "name": "transformer.h.36.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 21344256 }, { "name": "transformer.h.36.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 21357056 }, { "name": "transformer.h.36.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23814656 }, { "name": "transformer.h.36.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23826944 }, { "name": "transformer.h.36.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26186240 }, { "name": "transformer.h.36.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26198528 }, { "name": "transformer.h.36.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26210816 }, { "name": "transformer.h.36.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26223104 }, { "name": "transformer.h.36.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 26235392 } ], "md5sum": "5a1f9aefe7a66a640688ff62cdfecb6b" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.36.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "2851e5628b3be9bb4f74869cfdb04779" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.37.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "a89730e2fb8439fe7a86296e1bc49646" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.37.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "4da09cf2bad748c2e2c2c26e5af154a6" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.37.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "764c3f2b70f78eb6ecfc3b2859f1c4e7" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.37.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "c8c522da37d853a43585383da755b701" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 33276416, "records": [ { "name": "transformer.h.36.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.36.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9437184 }, { "name": "transformer.h.36.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 9449472 }, { "name": "transformer.h.37.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 18886656 }, { "name": "transformer.h.37.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 18899456 }, { "name": "transformer.h.37.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21357056 }, { "name": "transformer.h.37.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21369344 }, { "name": "transformer.h.37.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23728640 }, { "name": "transformer.h.37.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23740928 }, { "name": "transformer.h.37.ln_2.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23753216 }, { "name": "transformer.h.37.ln_2.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23765504 }, { "name": "transformer.h.37.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 23777792 }, { "name": "transformer.h.37.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23826944 }, { "name": "transformer.h.37.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 33264128 } ], "md5sum": "baccf9dffa7e198fe69c57e182969580" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 31592960, "records": [ { "name": "transformer.h.37.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.38.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.38.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.38.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.38.ln_1.bias", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 }, { "name": "transformer.h.38.ln_1.weight", "shape": [ 6144 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31580672 } ], "md5sum": "ace8556f39596971d713e75e29c805dd" } ] }