diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,8335 @@ +{ + "metadata": { + "ParamSize": 648, + "ParamBytes": 9896353792.0, + "BitsPerParam": 5.004652436926966 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 150994944, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 49152, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 150994944, + "byteOffset": 0 + } + ], + "md5sum": "a1f8ce3e53ff06c74298bc4a4ebed4aa" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.38.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "17e0a2cc25a010d9a281725954af99fb" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.38.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "ddc0131046a8e82d3bc431e581fe420e" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.38.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "98f0cbf1ce8a1ff0f62c289905443a2e" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 30769152, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 49152, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + }, + { + "name": "transformer.h.38.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 18874368 + }, + { + "name": "transformer.h.38.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 18886656 + }, + { + "name": "transformer.h.38.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 21245952 + }, + { + "name": "transformer.h.38.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 21258240 + }, + { + "name": "transformer.h.38.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 21270528 + }, + { + "name": "transformer.h.38.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 21319680 + }, + { + "name": "transformer.h.38.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 30756864 + } + ], + "md5sum": "b356a1c19101d7459e53b52f47a27fab" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.39.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "29b8f9697706d12efb4c360f13b03244" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 31580672, + "records": [ + { + "name": "transformer.h.38.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.39.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.39.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 9449984 + }, + { + "name": "transformer.h.39.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 29110784 + }, + { + "name": "transformer.h.39.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 31568384 + } + ], + "md5sum": "f5ae547b303f5ad28cec7da3a9d55143" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.39.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "c37267f95b390f392d73836fe62b8569" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.39.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "5b118b3d7fd8b5be3de643e29ba233bd" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 19660800, + "records": [ + { + "name": "transformer.h.0.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 0 + } + ], + "md5sum": "a8b4333e2beeb76550dd72ba9f6d2118" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.0.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e377affd5800cefd5a0148dda6ea42cc" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.0.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "59c4f9275d1cfa7f90c7e1259cc6708f" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 26309120, + "records": [ + { + "name": "transformer.h.39.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "transformer.h.39.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2359296 + }, + { + "name": "transformer.h.39.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2371584 + }, + { + "name": "transformer.h.39.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2383872 + }, + { + "name": "transformer.h.39.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2396160 + }, + { + "name": "transformer.h.39.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 2408448 + }, + { + "name": "transformer.h.39.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2457600 + }, + { + "name": "transformer.h.39.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 11894784 + }, + { + "name": "transformer.h.39.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11907072 + }, + { + "name": "transformer.ln_f.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 21344256 + }, + { + "name": "transformer.ln_f.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 21356544 + }, + { + "name": "transformer.h.0.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 21368832 + }, + { + "name": "transformer.h.0.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 21381632 + }, + { + "name": "transformer.h.0.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23839232 + }, + { + "name": "transformer.h.0.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23851520 + }, + { + "name": "transformer.h.0.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26210816 + }, + { + "name": "transformer.h.0.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26223104 + }, + { + "name": "transformer.h.0.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26235392 + }, + { + "name": "transformer.h.0.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26247680 + }, + { + "name": "transformer.h.0.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 26259968 + } + ], + "md5sum": "bef65549b19839f86cc56fcbebec4ef1" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.0.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "d6d4a6890452d4c23d71b3c4910612a5" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 19660800, + "records": [ + { + "name": "transformer.h.1.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 0 + } + ], + "md5sum": "657eec2f43e3bff41abc8c0eb6bb0c5e" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.1.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "f6160c24cd8d70fdb9651b61b0c8610e" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.1.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "cb75511185bb967c589b851d275b7d15" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.1.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "0d3d42e9bbfd7c5b10dcdd80d796e822" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 33276416, + "records": [ + { + "name": "transformer.h.0.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.0.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.0.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 9449472 + }, + { + "name": "transformer.h.1.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 18886656 + }, + { + "name": "transformer.h.1.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 18899456 + }, + { + "name": "transformer.h.1.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 21357056 + }, + { + "name": "transformer.h.1.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21369344 + }, + { + "name": "transformer.h.1.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23728640 + }, + { + "name": "transformer.h.1.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23740928 + }, + { + "name": "transformer.h.1.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23753216 + }, + { + "name": "transformer.h.1.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23765504 + }, + { + "name": "transformer.h.1.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 23777792 + }, + { + "name": "transformer.h.1.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23826944 + }, + { + "name": "transformer.h.1.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 33264128 + } + ], + "md5sum": "a5c58df854fce329e0fce5c79ccc0709" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.2.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "c549294e9cf331b320f31640c10b7c64" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 31580672, + "records": [ + { + "name": "transformer.h.1.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.2.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.2.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 9449984 + }, + { + "name": "transformer.h.2.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 29110784 + }, + { + "name": "transformer.h.2.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 31568384 + } + ], + "md5sum": "2394ba8ff60c512d3296ca4981fd37c2" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.2.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "d805803a760a22be3731d20d756bb126" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.2.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "59a5bd4ad3758674128ff9997c900f2b" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 19660800, + "records": [ + { + "name": "transformer.h.3.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 0 + } + ], + "md5sum": "a6c803b1db63f7752278022e2add76c1" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.3.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "4b0482f33b5a317031a55456a7c01b9b" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.3.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "b47a2a0c04f3bcfe9ec3fe9e820499ab" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 26284544, + "records": [ + { + "name": "transformer.h.2.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "transformer.h.2.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2359296 + }, + { + "name": "transformer.h.2.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2371584 + }, + { + "name": "transformer.h.2.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2383872 + }, + { + "name": "transformer.h.2.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2396160 + }, + { + "name": "transformer.h.2.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 2408448 + }, + { + "name": "transformer.h.2.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2457600 + }, + { + "name": "transformer.h.2.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 11894784 + }, + { + "name": "transformer.h.2.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11907072 + }, + { + "name": "transformer.h.3.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 21344256 + }, + { + "name": "transformer.h.3.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 21357056 + }, + { + "name": "transformer.h.3.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23814656 + }, + { + "name": "transformer.h.3.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23826944 + }, + { + "name": "transformer.h.3.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26186240 + }, + { + "name": "transformer.h.3.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26198528 + }, + { + "name": "transformer.h.3.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26210816 + }, + { + "name": "transformer.h.3.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26223104 + }, + { + "name": "transformer.h.3.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 26235392 + } + ], + "md5sum": "0c9e571d339f97b9a25c914b38970dc4" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.3.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "2e1735f3a4b27a621481f9b492f53447" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 19660800, + "records": [ + { + "name": "transformer.h.4.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 0 + } + ], + "md5sum": "d78f188e91b989041e570d423894b01a" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.4.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "ed69094322d4b2b873adc765cf8ff72d" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.4.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "2a5e4daf6b33762b7a01be62b1843b0c" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.4.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "c90d7e03e61b729c5fc0672813b14134" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 33276416, + "records": [ + { + "name": "transformer.h.3.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.3.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.3.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 9449472 + }, + { + "name": "transformer.h.4.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 18886656 + }, + { + "name": "transformer.h.4.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 18899456 + }, + { + "name": "transformer.h.4.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 21357056 + }, + { + "name": "transformer.h.4.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21369344 + }, + { + "name": "transformer.h.4.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23728640 + }, + { + "name": "transformer.h.4.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23740928 + }, + { + "name": "transformer.h.4.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23753216 + }, + { + "name": "transformer.h.4.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23765504 + }, + { + "name": "transformer.h.4.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 23777792 + }, + { + "name": "transformer.h.4.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23826944 + }, + { + "name": "transformer.h.4.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 33264128 + } + ], + "md5sum": "41ebf0b64908e277edb2e618da2c9cf7" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.5.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e8db3469a14e9873ac244aaf704a13ab" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 31580672, + "records": [ + { + "name": "transformer.h.4.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.5.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.5.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 9449984 + }, + { + "name": "transformer.h.5.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 29110784 + }, + { + "name": "transformer.h.5.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 31568384 + } + ], + "md5sum": "d6661b0fcf4135eb6133222dcae7f373" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.5.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "01a9e05d2e07c5ecf2ec4fd77e7acba4" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "transformer.wpe.q_weight", + "shape": [ + 8192, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "7a1e7b6a0155d0210a0242f731c3d54f" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 150994944, + "records": [ + { + "name": "transformer.wte.q_weight", + "shape": [ + 49152, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 150994944, + "byteOffset": 0 + } + ], + "md5sum": "a1f8ce3e53ff06c74298bc4a4ebed4aa" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.wte.q_scale", + "shape": [ + 49152, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "0653147e2715fd1cc5a5200120f52b6f" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 19660800, + "records": [ + { + "name": "transformer.h.10.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 0 + } + ], + "md5sum": "717cd73067301b08363602c0e35d6d5e" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.10.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "8623474afebbbb291a09d0e65754a187" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.10.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "f92b1bcb70460fe057e0d30d9a7b388e" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.10.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "bb123af6b78dd6f9b5591c08005792c8" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 29430272, + "records": [ + { + "name": "transformer.h.5.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "transformer.h.5.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2359296 + }, + { + "name": "transformer.h.5.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2371584 + }, + { + "name": "transformer.h.5.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2383872 + }, + { + "name": "transformer.h.5.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2396160 + }, + { + "name": "transformer.h.5.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 2408448 + }, + { + "name": "transformer.h.5.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2457600 + }, + { + "name": "transformer.wpe.q_scale", + "shape": [ + 8192, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 11894784 + }, + { + "name": "transformer.h.10.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 15040512 + }, + { + "name": "transformer.h.10.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 15053312 + }, + { + "name": "transformer.h.10.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 17510912 + }, + { + "name": "transformer.h.10.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 17523200 + }, + { + "name": "transformer.h.10.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19882496 + }, + { + "name": "transformer.h.10.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19894784 + }, + { + "name": "transformer.h.10.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19907072 + }, + { + "name": "transformer.h.10.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 19919360 + }, + { + "name": "transformer.h.10.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 19931648 + }, + { + "name": "transformer.h.10.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 19980800 + }, + { + "name": "transformer.h.10.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 29417984 + } + ], + "md5sum": "6503fbfa6d4d0d168fd6b3c7f908aace" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.11.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "53690a0d4aaef30b6745700dce81ac08" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 31580672, + "records": [ + { + "name": "transformer.h.10.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.11.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.11.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 9449984 + }, + { + "name": "transformer.h.11.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 29110784 + }, + { + "name": "transformer.h.11.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 31568384 + } + ], + "md5sum": "8e8e83647da34fc0667c3184d67b3059" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.11.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "b07eff4c1d6d06ec6d80e18b8cebd53e" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.11.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "b49f828c560ebc0c16d0bcfb268b5557" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 19660800, + "records": [ + { + "name": "transformer.h.12.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 0 + } + ], + "md5sum": "df3145595ed10fbfd24dc24fd869e8c6" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.5.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "225b638ba0dffda920dd7a1d7bc5f933" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 19660800, + "records": [ + { + "name": "transformer.h.6.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 0 + } + ], + "md5sum": "da176670c3040141e9e2c635e134be0e" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 33301504, + "records": [ + { + "name": "transformer.h.11.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "transformer.h.11.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2359296 + }, + { + "name": "transformer.h.11.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2371584 + }, + { + "name": "transformer.h.11.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2383872 + }, + { + "name": "transformer.h.11.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2396160 + }, + { + "name": "transformer.h.11.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 2408448 + }, + { + "name": "transformer.h.11.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2457600 + }, + { + "name": "transformer.h.11.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 11894784 + }, + { + "name": "transformer.h.11.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11907072 + }, + { + "name": "transformer.h.12.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 21344256 + }, + { + "name": "transformer.h.12.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 21357056 + }, + { + "name": "transformer.h.12.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23814656 + }, + { + "name": "transformer.h.12.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23826944 + }, + { + "name": "transformer.h.5.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23839232 + }, + { + "name": "transformer.h.5.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23851520 + }, + { + "name": "transformer.h.6.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 33288704 + } + ], + "md5sum": "45e93e9814b25812522f0fc47c126847" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.6.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "64608163c22541be3142fb625b422cc2" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.6.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "0dd8b2f4093baf9478ff76198282092e" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 33251328, + "records": [ + { + "name": "transformer.h.6.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 0 + }, + { + "name": "transformer.h.6.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2457600 + }, + { + "name": "transformer.h.6.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 2469888 + }, + { + "name": "transformer.h.6.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21344256 + }, + { + "name": "transformer.h.6.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23703552 + }, + { + "name": "transformer.h.6.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23715840 + }, + { + "name": "transformer.h.6.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23728128 + }, + { + "name": "transformer.h.6.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23740416 + }, + { + "name": "transformer.h.6.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 23752704 + }, + { + "name": "transformer.h.6.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23801856 + }, + { + "name": "transformer.h.6.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 33239040 + } + ], + "md5sum": "f63d8697334eec251d8206635efb1c83" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.7.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "408a452b4efbca2d79fdb5bf0ee655ae" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 31580672, + "records": [ + { + "name": "transformer.h.6.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.7.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.7.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 9449984 + }, + { + "name": "transformer.h.7.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 29110784 + }, + { + "name": "transformer.h.7.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 31568384 + } + ], + "md5sum": "4a445e33c7931da70d5a29a979040b65" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.7.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "4ac49ad9532369907fce4868bc4ff244" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.7.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "494fecd177dd5d4b8a4ee40de9d8dbf0" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 19660800, + "records": [ + { + "name": "transformer.h.8.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 0 + } + ], + "md5sum": "2d33ecddb0968e7b1dbe8b4d4c936139" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.8.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "9856a59c05a56c7757a1c4d8886311a6" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.8.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "9279a0a495df42e036b2fc58a2b7ae97" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 26284544, + "records": [ + { + "name": "transformer.h.7.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "transformer.h.7.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2359296 + }, + { + "name": "transformer.h.7.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2371584 + }, + { + "name": "transformer.h.7.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2383872 + }, + { + "name": "transformer.h.7.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2396160 + }, + { + "name": "transformer.h.7.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 2408448 + }, + { + "name": "transformer.h.7.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2457600 + }, + { + "name": "transformer.h.7.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 11894784 + }, + { + "name": "transformer.h.7.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11907072 + }, + { + "name": "transformer.h.8.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 21344256 + }, + { + "name": "transformer.h.8.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 21357056 + }, + { + "name": "transformer.h.8.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23814656 + }, + { + "name": "transformer.h.8.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23826944 + }, + { + "name": "transformer.h.8.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26186240 + }, + { + "name": "transformer.h.8.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26198528 + }, + { + "name": "transformer.h.8.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26210816 + }, + { + "name": "transformer.h.8.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26223104 + }, + { + "name": "transformer.h.8.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 26235392 + } + ], + "md5sum": "ccac7922f8d0b92c2a6592aca60a398c" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.8.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "8915d2342c55d28371a8aee3d996a02c" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 19660800, + "records": [ + { + "name": "transformer.h.9.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 0 + } + ], + "md5sum": "5631917a8518461388d0f723f3b9cccf" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.9.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "c8bb0c1c0db733ba726403329c37157c" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.9.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "002be25a681c6d9ab7fddc56f5cba9d0" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.9.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "4a4b9dd3fb825e9d5c176bfda03b7648" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 33276416, + "records": [ + { + "name": "transformer.h.8.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.8.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.8.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 9449472 + }, + { + "name": "transformer.h.9.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 18886656 + }, + { + "name": "transformer.h.9.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 18899456 + }, + { + "name": "transformer.h.9.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 21357056 + }, + { + "name": "transformer.h.9.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21369344 + }, + { + "name": "transformer.h.9.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23728640 + }, + { + "name": "transformer.h.9.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23740928 + }, + { + "name": "transformer.h.9.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23753216 + }, + { + "name": "transformer.h.9.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23765504 + }, + { + "name": "transformer.h.9.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 23777792 + }, + { + "name": "transformer.h.9.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23826944 + }, + { + "name": "transformer.h.9.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 33264128 + } + ], + "md5sum": "c4f74868cdb8908ed7eb750a7aa5497c" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.12.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "63d6dcc2ed5f2b46308b180211219846" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 30756864, + "records": [ + { + "name": "transformer.h.9.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.12.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.12.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 9449472 + }, + { + "name": "transformer.h.12.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 28323840 + }, + { + "name": "transformer.h.12.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 30683136 + }, + { + "name": "transformer.h.12.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 30695424 + }, + { + "name": "transformer.h.12.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 30707712 + } + ], + "md5sum": "9f1c4d2d96e099b599e2979ee0463901" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.12.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "e75f991d746e4feb712bc9089e4ad4f0" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 19660800, + "records": [ + { + "name": "transformer.h.13.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 0 + } + ], + "md5sum": "7907aa46d8cf305fbe9531dd1dd51b75" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.13.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "eb7fff5c9e42f305f3e6bf83e1e13e70" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.13.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "200400fe4eb7c633c9ed22580cc20139" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.13.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "1129f9663dadca9b853f01711761b122" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 33276416, + "records": [ + { + "name": "transformer.h.12.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.12.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.12.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 9449472 + }, + { + "name": "transformer.h.13.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 18886656 + }, + { + "name": "transformer.h.13.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 18899456 + }, + { + "name": "transformer.h.13.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 21357056 + }, + { + "name": "transformer.h.13.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21369344 + }, + { + "name": "transformer.h.13.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23728640 + }, + { + "name": "transformer.h.13.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23740928 + }, + { + "name": "transformer.h.13.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23753216 + }, + { + "name": "transformer.h.13.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23765504 + }, + { + "name": "transformer.h.13.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 23777792 + }, + { + "name": "transformer.h.13.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23826944 + }, + { + "name": "transformer.h.13.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 33264128 + } + ], + "md5sum": "2187e4f3c5d157f2fafe36d5198afbce" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.14.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "1436f44190553727c14212c4462ea3df" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 31580672, + "records": [ + { + "name": "transformer.h.13.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.14.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.14.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 9449984 + }, + { + "name": "transformer.h.14.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 29110784 + }, + { + "name": "transformer.h.14.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 31568384 + } + ], + "md5sum": "af7791cfa3c5041ed92e1a01a1ff5914" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.14.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "120b7e60779129f74289ba9f01597cb8" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.14.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "ec93fbecd661af0030dd648615b38069" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 19660800, + "records": [ + { + "name": "transformer.h.15.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 0 + } + ], + "md5sum": "d53f85b4640a1ce824a03d72e7622ca1" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.15.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "85c6949b4f88c6ac9d2abe03391bc128" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.15.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "0e91cc70ef4d5debaef3fcb72cbd76cb" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 26284544, + "records": [ + { + "name": "transformer.h.14.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "transformer.h.14.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2359296 + }, + { + "name": "transformer.h.14.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2371584 + }, + { + "name": "transformer.h.14.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2383872 + }, + { + "name": "transformer.h.14.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2396160 + }, + { + "name": "transformer.h.14.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 2408448 + }, + { + "name": "transformer.h.14.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2457600 + }, + { + "name": "transformer.h.14.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 11894784 + }, + { + "name": "transformer.h.14.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11907072 + }, + { + "name": "transformer.h.15.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 21344256 + }, + { + "name": "transformer.h.15.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 21357056 + }, + { + "name": "transformer.h.15.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23814656 + }, + { + "name": "transformer.h.15.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23826944 + }, + { + "name": "transformer.h.15.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26186240 + }, + { + "name": "transformer.h.15.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26198528 + }, + { + "name": "transformer.h.15.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26210816 + }, + { + "name": "transformer.h.15.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26223104 + }, + { + "name": "transformer.h.15.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 26235392 + } + ], + "md5sum": "8cd32e75d754375b649edd0d720687db" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.15.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "3e0dcc2203b02bd22045bdff7fa7cf21" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 19660800, + "records": [ + { + "name": "transformer.h.16.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 0 + } + ], + "md5sum": "5decdaf1f8dcc0df0718fd81589fac8c" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.16.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "4f1d489a636017fd8106c4aa3debb9f8" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.16.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "8daab04e62b05513d8827922f392ace9" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.16.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "9641926287aa9353519affd490b0dd4b" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 33276416, + "records": [ + { + "name": "transformer.h.15.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.15.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.15.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 9449472 + }, + { + "name": "transformer.h.16.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 18886656 + }, + { + "name": "transformer.h.16.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 18899456 + }, + { + "name": "transformer.h.16.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 21357056 + }, + { + "name": "transformer.h.16.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21369344 + }, + { + "name": "transformer.h.16.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23728640 + }, + { + "name": "transformer.h.16.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23740928 + }, + { + "name": "transformer.h.16.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23753216 + }, + { + "name": "transformer.h.16.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23765504 + }, + { + "name": "transformer.h.16.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 23777792 + }, + { + "name": "transformer.h.16.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23826944 + }, + { + "name": "transformer.h.16.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 33264128 + } + ], + "md5sum": "6affc3ce90d1f2369a5d988f3f7d206d" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.17.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "b1165987cfd03932a4b81d774d082668" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 31580672, + "records": [ + { + "name": "transformer.h.16.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.17.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.17.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 9449984 + }, + { + "name": "transformer.h.17.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 29110784 + }, + { + "name": "transformer.h.17.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 31568384 + } + ], + "md5sum": "635e79fc837ea7949fe073fec4109310" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.17.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "041abd92a6b2a7721836b564bbda3c1f" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.17.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "0fdd99f396a9ea911e0605f5f2da1990" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 19660800, + "records": [ + { + "name": "transformer.h.18.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 0 + } + ], + "md5sum": "16075705d7bb1506b01ffc4a6d60c072" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.18.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "3573ce5c407d642699b52e6fe7142e5d" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.18.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "70ba0841ef7ede7fb24c4de24cf1be15" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 26284544, + "records": [ + { + "name": "transformer.h.17.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "transformer.h.17.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2359296 + }, + { + "name": "transformer.h.17.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2371584 + }, + { + "name": "transformer.h.17.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2383872 + }, + { + "name": "transformer.h.17.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2396160 + }, + { + "name": "transformer.h.17.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 2408448 + }, + { + "name": "transformer.h.17.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2457600 + }, + { + "name": "transformer.h.17.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 11894784 + }, + { + "name": "transformer.h.17.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11907072 + }, + { + "name": "transformer.h.18.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 21344256 + }, + { + "name": "transformer.h.18.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 21357056 + }, + { + "name": "transformer.h.18.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23814656 + }, + { + "name": "transformer.h.18.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23826944 + }, + { + "name": "transformer.h.18.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26186240 + }, + { + "name": "transformer.h.18.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26198528 + }, + { + "name": "transformer.h.18.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26210816 + }, + { + "name": "transformer.h.18.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26223104 + }, + { + "name": "transformer.h.18.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 26235392 + } + ], + "md5sum": "d97288b33559552e5d36780133798d4d" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.18.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "110da2339e6408566494341a4c82dd48" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 19660800, + "records": [ + { + "name": "transformer.h.19.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 0 + } + ], + "md5sum": "1699da312d4e8759e0a19e9386e68784" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.19.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "58936aeacb32197409832c96372d3c5d" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.19.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "f1e7e380c51d70597585b0f05e721746" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.19.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "f196b55e163189198cc9412710d32d54" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 33276416, + "records": [ + { + "name": "transformer.h.18.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.18.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.18.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 9449472 + }, + { + "name": "transformer.h.19.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 18886656 + }, + { + "name": "transformer.h.19.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 18899456 + }, + { + "name": "transformer.h.19.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 21357056 + }, + { + "name": "transformer.h.19.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21369344 + }, + { + "name": "transformer.h.19.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23728640 + }, + { + "name": "transformer.h.19.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23740928 + }, + { + "name": "transformer.h.19.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23753216 + }, + { + "name": "transformer.h.19.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23765504 + }, + { + "name": "transformer.h.19.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 23777792 + }, + { + "name": "transformer.h.19.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23826944 + }, + { + "name": "transformer.h.19.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 33264128 + } + ], + "md5sum": "e1758fb4ee3137089702d4be5cfc8d0c" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.20.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "11ac7d28aeb3d96eadf85a4c40c25461" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 31580672, + "records": [ + { + "name": "transformer.h.19.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.20.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.20.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 9449984 + }, + { + "name": "transformer.h.20.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 29110784 + }, + { + "name": "transformer.h.20.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 31568384 + } + ], + "md5sum": "7e2839754a83643ea3f17bed2458fee0" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.20.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "e950f5f447253f5ad971162f81f32be7" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.20.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "91f8f3423fc3cee70eafb06214683194" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 19660800, + "records": [ + { + "name": "transformer.h.21.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 0 + } + ], + "md5sum": "f8065f6cf0542783384b0df7e27e74d9" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.21.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "a66f7efe305bfa44ac768b2f2d1554a3" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.21.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "611a2f01834eb4c619c1bca613bdd061" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 26284544, + "records": [ + { + "name": "transformer.h.20.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "transformer.h.20.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2359296 + }, + { + "name": "transformer.h.20.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2371584 + }, + { + "name": "transformer.h.20.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2383872 + }, + { + "name": "transformer.h.20.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2396160 + }, + { + "name": "transformer.h.20.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 2408448 + }, + { + "name": "transformer.h.20.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2457600 + }, + { + "name": "transformer.h.20.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 11894784 + }, + { + "name": "transformer.h.20.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11907072 + }, + { + "name": "transformer.h.21.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 21344256 + }, + { + "name": "transformer.h.21.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 21357056 + }, + { + "name": "transformer.h.21.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23814656 + }, + { + "name": "transformer.h.21.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23826944 + }, + { + "name": "transformer.h.21.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26186240 + }, + { + "name": "transformer.h.21.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26198528 + }, + { + "name": "transformer.h.21.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26210816 + }, + { + "name": "transformer.h.21.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26223104 + }, + { + "name": "transformer.h.21.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 26235392 + } + ], + "md5sum": "6d2f7c4fed47b874405bef92bb8276de" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.21.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "e4bbde639174e283a6cc07ed824bc04c" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 19660800, + "records": [ + { + "name": "transformer.h.22.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 0 + } + ], + "md5sum": "0d38d6416cb3dce9ba9e495ec608c961" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.22.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "d3b9f52f17de3ed9a98c905a5d7b3056" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.22.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "185cc76985b5e1eb40f67e91c559083c" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.22.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "3e76004fbd640a02eee2772e582e0210" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 33276416, + "records": [ + { + "name": "transformer.h.21.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.21.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.21.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 9449472 + }, + { + "name": "transformer.h.22.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 18886656 + }, + { + "name": "transformer.h.22.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 18899456 + }, + { + "name": "transformer.h.22.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 21357056 + }, + { + "name": "transformer.h.22.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21369344 + }, + { + "name": "transformer.h.22.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23728640 + }, + { + "name": "transformer.h.22.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23740928 + }, + { + "name": "transformer.h.22.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23753216 + }, + { + "name": "transformer.h.22.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23765504 + }, + { + "name": "transformer.h.22.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 23777792 + }, + { + "name": "transformer.h.22.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23826944 + }, + { + "name": "transformer.h.22.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 33264128 + } + ], + "md5sum": "f78ed2379a51d95e3d6f4e9d517c0d49" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.23.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "8e4150d91e44c011105b5a12824eb5d0" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 31580672, + "records": [ + { + "name": "transformer.h.22.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.23.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.23.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 9449984 + }, + { + "name": "transformer.h.23.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 29110784 + }, + { + "name": "transformer.h.23.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 31568384 + } + ], + "md5sum": "1b2f391eb7339f08c24b680b272cecf2" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.23.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "ea2b8ee11895f17f0c62d36bad6a3974" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.23.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "a87c9d39dd2f4ae98ddf82781781baac" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 19660800, + "records": [ + { + "name": "transformer.h.24.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 0 + } + ], + "md5sum": "3cc9dcc31372b204763852879be8ecd1" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.24.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "3b55bf20d31e330d667ddd2492c2bb24" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.24.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "90d28ad8925efa6e5cd7ad0481794e13" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 26284544, + "records": [ + { + "name": "transformer.h.23.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "transformer.h.23.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2359296 + }, + { + "name": "transformer.h.23.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2371584 + }, + { + "name": "transformer.h.23.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2383872 + }, + { + "name": "transformer.h.23.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2396160 + }, + { + "name": "transformer.h.23.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 2408448 + }, + { + "name": "transformer.h.23.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2457600 + }, + { + "name": "transformer.h.23.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 11894784 + }, + { + "name": "transformer.h.23.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11907072 + }, + { + "name": "transformer.h.24.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 21344256 + }, + { + "name": "transformer.h.24.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 21357056 + }, + { + "name": "transformer.h.24.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23814656 + }, + { + "name": "transformer.h.24.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23826944 + }, + { + "name": "transformer.h.24.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26186240 + }, + { + "name": "transformer.h.24.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26198528 + }, + { + "name": "transformer.h.24.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26210816 + }, + { + "name": "transformer.h.24.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26223104 + }, + { + "name": "transformer.h.24.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 26235392 + } + ], + "md5sum": "ef0d16395f2f61ae35ab0ea8a347b0c8" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.24.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "564622c1c313e46a28ac75c99ff1883f" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 19660800, + "records": [ + { + "name": "transformer.h.25.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 0 + } + ], + "md5sum": "bfb6d2a773869a54a44c09f46d0d9312" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.25.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "77d3a7a4a7fd44afea00e31e23d8ff45" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.25.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "e1ee86c4de8001c6ef20892799b18012" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.25.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "d9016833198578324ea828e3509d7219" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 33276416, + "records": [ + { + "name": "transformer.h.24.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.24.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.24.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 9449472 + }, + { + "name": "transformer.h.25.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 18886656 + }, + { + "name": "transformer.h.25.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 18899456 + }, + { + "name": "transformer.h.25.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 21357056 + }, + { + "name": "transformer.h.25.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 21369344 + }, + { + "name": "transformer.h.25.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 21381632 + }, + { + "name": "transformer.h.25.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21393920 + }, + { + "name": "transformer.h.25.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23753216 + }, + { + "name": "transformer.h.25.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23765504 + }, + { + "name": "transformer.h.25.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 23777792 + }, + { + "name": "transformer.h.25.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23826944 + }, + { + "name": "transformer.h.25.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 33264128 + } + ], + "md5sum": "e53f95b3e94c840042174c3c6c3eadc3" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.26.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "14f182624b8f5e10c99d6cf6ee25e7b7" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 31580672, + "records": [ + { + "name": "transformer.h.25.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.26.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.26.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 9449984 + }, + { + "name": "transformer.h.26.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 29110784 + }, + { + "name": "transformer.h.26.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 31568384 + } + ], + "md5sum": "b89850e92825a95a291986ddd4897017" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.26.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "6ce6fdb68ad26eadb2da67eaa1a1f36e" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.26.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "e9fa5c2966e5ccfa445c5d84bec36b10" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 19660800, + "records": [ + { + "name": "transformer.h.27.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 0 + } + ], + "md5sum": "722734886418255fc04e7a3796dc6765" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.27.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "d2ac90783e2275189afbe82621906057" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.27.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "976e4fec289b9ba1fc9b52e523348977" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 26284544, + "records": [ + { + "name": "transformer.h.26.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "transformer.h.26.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2359296 + }, + { + "name": "transformer.h.26.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2371584 + }, + { + "name": "transformer.h.26.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2383872 + }, + { + "name": "transformer.h.26.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2396160 + }, + { + "name": "transformer.h.26.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 2408448 + }, + { + "name": "transformer.h.26.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2457600 + }, + { + "name": "transformer.h.26.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 11894784 + }, + { + "name": "transformer.h.26.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11907072 + }, + { + "name": "transformer.h.27.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 21344256 + }, + { + "name": "transformer.h.27.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 21357056 + }, + { + "name": "transformer.h.27.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23814656 + }, + { + "name": "transformer.h.27.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23826944 + }, + { + "name": "transformer.h.27.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26186240 + }, + { + "name": "transformer.h.27.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26198528 + }, + { + "name": "transformer.h.27.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26210816 + }, + { + "name": "transformer.h.27.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26223104 + }, + { + "name": "transformer.h.27.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 26235392 + } + ], + "md5sum": "a66c5b3ea1a7cf52fd2a5a3e7302536d" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.27.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "55f0201f176f406ad44112482555cebf" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 19660800, + "records": [ + { + "name": "transformer.h.28.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 0 + } + ], + "md5sum": "a8f6a604dfbfcc61a25e3196fd4e0791" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.28.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "4c8cb2476d21c193a7d6afece147f83a" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.28.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "5c5e364bec8ddbb83c546bc1f966c4e9" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.28.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "e6f3447a65dfc8cc3085ee097b85207a" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 33276416, + "records": [ + { + "name": "transformer.h.27.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.27.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.27.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 9449472 + }, + { + "name": "transformer.h.28.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 18886656 + }, + { + "name": "transformer.h.28.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 18899456 + }, + { + "name": "transformer.h.28.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 21357056 + }, + { + "name": "transformer.h.28.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21369344 + }, + { + "name": "transformer.h.28.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23728640 + }, + { + "name": "transformer.h.28.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23740928 + }, + { + "name": "transformer.h.28.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23753216 + }, + { + "name": "transformer.h.28.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23765504 + }, + { + "name": "transformer.h.28.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 23777792 + }, + { + "name": "transformer.h.28.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23826944 + }, + { + "name": "transformer.h.28.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 33264128 + } + ], + "md5sum": "6efa742897b5b61bbee5f6417e3d19c3" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.29.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "80cf6d2a01b73e2d913fbf2035426fac" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 31580672, + "records": [ + { + "name": "transformer.h.28.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.29.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.29.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 9449984 + }, + { + "name": "transformer.h.29.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 29110784 + }, + { + "name": "transformer.h.29.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 31568384 + } + ], + "md5sum": "b89b92488fc1ee83a5541bacbe665213" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.29.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "b16b54991277ba85488d9e03160aed0b" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.29.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "447f9b53ff319903e32e1113fa9fa683" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 19660800, + "records": [ + { + "name": "transformer.h.30.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 0 + } + ], + "md5sum": "745745034a1ad129de5fed507a8b5893" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.30.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "2e1b3c20a38d0a50a4a2ee54519fd844" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.30.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "b6b3b1c56f256f755831da6512b0b0ae" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 26284544, + "records": [ + { + "name": "transformer.h.29.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "transformer.h.29.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2359296 + }, + { + "name": "transformer.h.29.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2371584 + }, + { + "name": "transformer.h.29.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2383872 + }, + { + "name": "transformer.h.29.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2396160 + }, + { + "name": "transformer.h.29.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 2408448 + }, + { + "name": "transformer.h.29.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2457600 + }, + { + "name": "transformer.h.29.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 11894784 + }, + { + "name": "transformer.h.29.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11907072 + }, + { + "name": "transformer.h.30.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 21344256 + }, + { + "name": "transformer.h.30.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 21357056 + }, + { + "name": "transformer.h.30.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23814656 + }, + { + "name": "transformer.h.30.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23826944 + }, + { + "name": "transformer.h.30.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26186240 + }, + { + "name": "transformer.h.30.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26198528 + }, + { + "name": "transformer.h.30.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26210816 + }, + { + "name": "transformer.h.30.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26223104 + }, + { + "name": "transformer.h.30.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 26235392 + } + ], + "md5sum": "2c8f4580ffbc220dae40684cad072ab3" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.30.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "a53a6c512815fe44db791afa92c1dd9d" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 19660800, + "records": [ + { + "name": "transformer.h.31.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 0 + } + ], + "md5sum": "920dee997cef318438bd4cdf205272a5" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.31.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "99349b3991d094d9cf72ea013d08a552" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.31.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "9fc9fb204d9c5c2ff25613c1faabf877" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.31.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "ac71bcb1897bba5f94e7560e750895c9" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 33276416, + "records": [ + { + "name": "transformer.h.30.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.30.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.30.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 9449472 + }, + { + "name": "transformer.h.31.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 18886656 + }, + { + "name": "transformer.h.31.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 18899456 + }, + { + "name": "transformer.h.31.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 21357056 + }, + { + "name": "transformer.h.31.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21369344 + }, + { + "name": "transformer.h.31.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23728640 + }, + { + "name": "transformer.h.31.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23740928 + }, + { + "name": "transformer.h.31.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23753216 + }, + { + "name": "transformer.h.31.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23765504 + }, + { + "name": "transformer.h.31.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 23777792 + }, + { + "name": "transformer.h.31.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23826944 + }, + { + "name": "transformer.h.31.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 33264128 + } + ], + "md5sum": "637cacdc791858b47f1aed1b80ecac33" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.32.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "33f7bf416224d49a4bff47074d76e01c" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 31580672, + "records": [ + { + "name": "transformer.h.31.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.32.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.32.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 9449984 + }, + { + "name": "transformer.h.32.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 29110784 + }, + { + "name": "transformer.h.32.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 31568384 + } + ], + "md5sum": "bd758ba59581eed3daec2bcc552194b4" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.32.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "c7c57d06373c8a367cf83685c471756b" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.32.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "07d1948861eb8cfc722dad30b6d12d2c" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 19660800, + "records": [ + { + "name": "transformer.h.33.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 0 + } + ], + "md5sum": "8a1d986ab2e90093721fb8618de6bbac" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.33.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "5ca39ea6c8656e07ad191bf4b59c20c6" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.33.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "0d7aa19d0bc9d87c5eeddf9c943451f1" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 26284544, + "records": [ + { + "name": "transformer.h.32.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "transformer.h.32.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2359296 + }, + { + "name": "transformer.h.32.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2371584 + }, + { + "name": "transformer.h.32.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2383872 + }, + { + "name": "transformer.h.32.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2396160 + }, + { + "name": "transformer.h.32.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 2408448 + }, + { + "name": "transformer.h.32.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2457600 + }, + { + "name": "transformer.h.32.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 11894784 + }, + { + "name": "transformer.h.32.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11907072 + }, + { + "name": "transformer.h.33.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 21344256 + }, + { + "name": "transformer.h.33.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 21357056 + }, + { + "name": "transformer.h.33.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23814656 + }, + { + "name": "transformer.h.33.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23826944 + }, + { + "name": "transformer.h.33.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26186240 + }, + { + "name": "transformer.h.33.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26198528 + }, + { + "name": "transformer.h.33.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26210816 + }, + { + "name": "transformer.h.33.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26223104 + }, + { + "name": "transformer.h.33.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 26235392 + } + ], + "md5sum": "9221e022c3869ea97ad8ae3d9925d96c" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.33.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "954bfb39aad84aaa33f8b454af133ec0" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 19660800, + "records": [ + { + "name": "transformer.h.34.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 0 + } + ], + "md5sum": "968cbf2fef09d43ed0be7add908b6c64" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.34.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "1f9923cebf5def5c59b4bcd8e107af7c" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.34.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "b56fd5a2333602e51cab57c860ba25a1" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.34.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "f98daa90419b679971be19dce9914b1b" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 33276416, + "records": [ + { + "name": "transformer.h.33.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.33.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.33.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 9449472 + }, + { + "name": "transformer.h.34.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 18886656 + }, + { + "name": "transformer.h.34.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 18899456 + }, + { + "name": "transformer.h.34.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 21357056 + }, + { + "name": "transformer.h.34.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21369344 + }, + { + "name": "transformer.h.34.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23728640 + }, + { + "name": "transformer.h.34.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23740928 + }, + { + "name": "transformer.h.34.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23753216 + }, + { + "name": "transformer.h.34.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23765504 + }, + { + "name": "transformer.h.34.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 23777792 + }, + { + "name": "transformer.h.34.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23826944 + }, + { + "name": "transformer.h.34.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 33264128 + } + ], + "md5sum": "c8ed0e9a7baae8fb5fe28f940b2c5ee6" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.35.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "2a2d1d6c0bff53cda69d483db9ba8d5a" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 31580672, + "records": [ + { + "name": "transformer.h.34.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.35.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.35.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 9449984 + }, + { + "name": "transformer.h.35.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 29110784 + }, + { + "name": "transformer.h.35.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 31568384 + } + ], + "md5sum": "b475e338f4f25a1434a75d336001a7cf" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.35.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "0d65139928e92d6078a2c300d001ccd2" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.35.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "970b7ef4a1d2d82e9b20d3f40fe4b481" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 19660800, + "records": [ + { + "name": "transformer.h.36.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 0 + } + ], + "md5sum": "7c99b8fa4db88c6213d6ea09b4a89867" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.36.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "0a8c7f7c85fe8bddb3bdca7e510d1c77" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.36.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "7f3b43500f12d916b524d3d76fd88834" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 26284544, + "records": [ + { + "name": "transformer.h.35.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "transformer.h.35.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2359296 + }, + { + "name": "transformer.h.35.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2371584 + }, + { + "name": "transformer.h.35.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2383872 + }, + { + "name": "transformer.h.35.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 2396160 + }, + { + "name": "transformer.h.35.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 2408448 + }, + { + "name": "transformer.h.35.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 2457600 + }, + { + "name": "transformer.h.35.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 11894784 + }, + { + "name": "transformer.h.35.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 11907072 + }, + { + "name": "transformer.h.36.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 21344256 + }, + { + "name": "transformer.h.36.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 21357056 + }, + { + "name": "transformer.h.36.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23814656 + }, + { + "name": "transformer.h.36.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 23826944 + }, + { + "name": "transformer.h.36.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26186240 + }, + { + "name": "transformer.h.36.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26198528 + }, + { + "name": "transformer.h.36.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26210816 + }, + { + "name": "transformer.h.36.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 26223104 + }, + { + "name": "transformer.h.36.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 26235392 + } + ], + "md5sum": "5a1f9aefe7a66a640688ff62cdfecb6b" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.36.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "2851e5628b3be9bb4f74869cfdb04779" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 19660800, + "records": [ + { + "name": "transformer.h.37.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 0 + } + ], + "md5sum": "a89730e2fb8439fe7a86296e1bc49646" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.37.attn.c_proj.q_weight", + "shape": [ + 6144, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "4da09cf2bad748c2e2c2c26e5af154a6" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.37.mlp.c_fc.q_weight", + "shape": [ + 24576, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "764c3f2b70f78eb6ecfc3b2859f1c4e7" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "transformer.h.37.mlp.c_proj.q_weight", + "shape": [ + 6144, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "c8c522da37d853a43585383da755b701" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 33276416, + "records": [ + { + "name": "transformer.h.36.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.36.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.36.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 9449472 + }, + { + "name": "transformer.h.37.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 18886656 + }, + { + "name": "transformer.h.37.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 18899456 + }, + { + "name": "transformer.h.37.attn.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 21357056 + }, + { + "name": "transformer.h.37.attn.c_proj.q_scale", + "shape": [ + 6144, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 21369344 + }, + { + "name": "transformer.h.37.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23728640 + }, + { + "name": "transformer.h.37.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23740928 + }, + { + "name": "transformer.h.37.ln_2.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23753216 + }, + { + "name": "transformer.h.37.ln_2.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23765504 + }, + { + "name": "transformer.h.37.mlp.c_fc.bias", + "shape": [ + 24576 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 49152, + "byteOffset": 23777792 + }, + { + "name": "transformer.h.37.mlp.c_fc.q_scale", + "shape": [ + 24576, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 23826944 + }, + { + "name": "transformer.h.37.mlp.c_proj.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 33264128 + } + ], + "md5sum": "baccf9dffa7e198fe69c57e182969580" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 31592960, + "records": [ + { + "name": "transformer.h.37.mlp.c_proj.q_scale", + "shape": [ + 6144, + 768 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 9437184, + "byteOffset": 0 + }, + { + "name": "transformer.h.38.attn.c_attn.bias", + "shape": [ + 6400 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12800, + "byteOffset": 9437184 + }, + { + "name": "transformer.h.38.attn.c_attn.q_weight", + "shape": [ + 6400, + 768 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 19660800, + "byteOffset": 9449984 + }, + { + "name": "transformer.h.38.attn.c_attn.q_scale", + "shape": [ + 6400, + 192 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2457600, + "byteOffset": 29110784 + }, + { + "name": "transformer.h.38.ln_1.bias", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 31568384 + }, + { + "name": "transformer.h.38.ln_1.weight", + "shape": [ + 6144 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 31580672 + } + ], + "md5sum": "ace8556f39596971d713e75e29c805dd" + } + ] +} \ No newline at end of file