{ "metadata": { "ParamSize": 171, "ParamBytes": 3673657344.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 622329856, "records": [ { "name": "lm_head.weight", "shape": [ 151936, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 622329856, "byteOffset": 0 } ], "md5sum": "83c85f622c9c298d6de5cd6d58789736" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 622329856, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 151936, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 622329856, "byteOffset": 0 } ], "md5sum": "206ebbfb149647701dbeb218436b7d8f" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "09c446e956ab6d4a147bd40f2de7bea5" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.0.self_attn.c_attn.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0292eee7b82a5b618c0ccb911837b204" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "2bc2c7e7d9bbab5dae35f6341df3d5d4" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "223e4f9d1bc3272485ebef2b95d1f1f5" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.self_attn.c_attn.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b6d9f3d75d83c86c7a34fabaf5a622dd" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 30973952, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 4096 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 22548480 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 22552576 }, { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 22564864 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30953472 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30957568 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30961664 } ], "md5sum": "64a2cf6c36a6ef5aaf9da3622d3eb666" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "5facbe96fed8417fd67054abece53006" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.10.self_attn.c_attn.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1511e9775e7b1fb03f1096d48b60301e" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 30953472, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30941184 } ], "md5sum": "801c9302955ade10a6a38eea1cdd663e" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "79cfed7dc713b299f61b18dee20376dc" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.self_attn.c_attn.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2839deaf8ac8792833e0a170eb677773" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 30953472, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30941184 } ], "md5sum": "56dbcb95902d10b2165a68fdd689a853" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "750ea469ec1790dd12fc5df63245f36c" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.12.self_attn.c_attn.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "53d7deafe41c7cfd26cdde144f001b84" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 30953472, "records": [ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30941184 } ], "md5sum": "d5ab1c2914b7b5fb9463ae06c17ad62f" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "c818bbd34597e1adc930a21bc1281901" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.13.self_attn.c_attn.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8cb5f8dc0b1df94cdc4758972b8bcb79" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 30953472, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30941184 } ], "md5sum": "81ea46aba7c4040e488eb1f53eb85395" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "c3fe540355374d554e380b67b9130e6d" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.self_attn.c_attn.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6acccdb7b8091022a954b0629df7a8d7" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 30953472, "records": [ { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30941184 } ], "md5sum": "1c4e8ee8dfc04f527e3b04441cee0621" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "445a73f350f95f8c6aaf470a0648aa41" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.self_attn.c_attn.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b5f6ab967a5a9748e5b9ae7a939352bd" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 30953472, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30941184 } ], "md5sum": "d7692d14fdec212853e321f5849025fd" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "c9d31c64260450a8c520a43ddba6ccda" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.16.self_attn.c_attn.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c1404d446281f41b4ab60e586ebe42af" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 30953472, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30941184 } ], "md5sum": "33d7615d5499f44e57ec03c95ab2b5d8" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "b000699d16122be07193b657fce01e3b" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.17.self_attn.c_attn.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7edca338357f18752de1c9a894e7653f" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 30953472, "records": [ { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30941184 } ], "md5sum": "9854d2759e328a0230b7d8f7623dc54a" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "9c8b9e17393d828088467705cb6f6825" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.18.self_attn.c_attn.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f37dd3700560972a3e624e0ee4beb0f8" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 30953472, "records": [ { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30941184 } ], "md5sum": "7a8438b7fdb6b3fee35d8372f1f9df80" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "c376d9da8152e4b47d955418afcfa6f5" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.19.self_attn.c_attn.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "bdc65263820cdccb8af74c212d500cf4" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 30953472, "records": [ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30941184 } ], "md5sum": "17316b02f50d215eee2c183605a28455" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "a6739cd336bcfca9fcf694f4c859e785" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.self_attn.c_attn.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "52be86752d8b85e4eab1baf5ff2dc06a" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 30953472, "records": [ { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30941184 } ], "md5sum": "45412cba0d7d92875a4b66dceaaee000" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "f376a2446673e2884216fb480defe9a4" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.20.self_attn.c_attn.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "58e5c1da262bdbc658646cdb25586e50" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 30953472, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30941184 } ], "md5sum": "9cb5fb5ca40f3c44b0892935edb590c8" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "e8dc7a080286b770f450c7b8b9233d31" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.21.self_attn.c_attn.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c3a070435a1738702f4fa97639b1295f" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 30953472, "records": [ { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30941184 } ], "md5sum": "dc5afcb289c1193402fc5a0d3ebe3b1d" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "1e6b00aec599d853df3baa265d4a1690" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.22.self_attn.c_attn.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "bdb912d45a0626cb91a0da0abbfb84a8" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 30953472, "records": [ { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30941184 } ], "md5sum": "de46fd346a15c41b42e67b3d0ae673c9" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "4cb92d3031b8a8d7e4944d6d6d26af3b" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.23.self_attn.c_attn.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8b7e38cd6f60865a1db8034d27eecaf0" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 30953472, "records": [ { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30941184 } ], "md5sum": "8f04f524cd1d8946116a5751ebfbc85f" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "401108ec17a0721237c7893d86405968" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.self_attn.c_attn.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "131f98baea5d8cb84ede2ad0bdad81e4" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 30953472, "records": [ { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30941184 } ], "md5sum": "8c25341395fac238ab0fe7eb58432000" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "1b4069779763ce8e9ded3be2093aec82" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.self_attn.c_attn.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b7202732924be475d50445bd09148ebc" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 30953472, "records": [ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30941184 } ], "md5sum": "87d54674433056fb7111710ce9ee1da1" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "e6d8a89c39719c8894be8677b84c34bd" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.self_attn.c_attn.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "914748c73b153225064d5473a71bf480" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 30953472, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30941184 } ], "md5sum": "194c16b3e951936f2662d8060d5f7163" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "0d2929123d475dddbf5348e7c3bca32e" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.self_attn.c_attn.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4c16d2f3f5523fedb224ee355179c4bc" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 30953472, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30941184 } ], "md5sum": "a4ae8bed4420f2ccc51bd6d26b80d703" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "ac79b2cd07190cc8f27f6d39447d8008" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.self_attn.c_attn.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "9c74c6e83c316439766dbabd9b4bfe61" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 30953472, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30941184 } ], "md5sum": "189cf5ee330a2d298e3bf44a5d79c926" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "f82bde1caaf69d79b46d16c9f3630470" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.self_attn.c_attn.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "04b1b10c63c030e8b4461af7528fddba" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 30953472, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30941184 } ], "md5sum": "dd4892ac9920883590d988c7a442ec0e" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "396e3d7bfa6e3f6cda19abbb40b9c81f" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.self_attn.c_attn.weight", "shape": [ 6144, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a9fe1c41e97698293f83322043b116ab" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 30953472, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 30937088 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30941184 } ], "md5sum": "9c66125e43d4a5db730097c03f9afc4a" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 8392704, "records": [ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 } ], "md5sum": "9139274d063a3faf7d47ce9aaa9431a7" } ] }