diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,3470 @@ +{ + "metadata": { + "ParamSize": 254, + "ParamBytes": 6171877376.0, + "BitsPerParam": 16.0 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 622329856, + "records": [ + { + "name": "model.embed_tokens.weight", + "shape": [ + 151936, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 622329856, + "byteOffset": 0 + } + ], + "md5sum": "f58c0acb41b85d8e7d5f5c53d9501526" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "0bfaa9620d061038eb70bf4c0188c447" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "45649b9a88a54d3f210ec7c6b762aee8" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "1fb6637c50eec204f716a6e2b7004948" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "826a05e4a27388aafc5e4a489f6ab271" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 29386752, + "records": [ + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 0 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 4096 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 8192 + }, + { + "name": "model.layers.0.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 13312 + }, + { + "name": "model.layers.0.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 10499072 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18887680 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18891776 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18895872 + }, + { + "name": "model.layers.1.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 18900992 + } + ], + "md5sum": "4965e678f27fd5965a9dc4b529721a61" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "9dfa1476617ee80bce853a1d126ef23a" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "e497cf0aad47d8ef2cbae595c67ba1e3" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "73bff5ef420b7df2698ca90bf2d4df51" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "b67ddd9c92d023512982fa3830353421" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 27289600, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8392704 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 8396800 + }, + { + "name": "model.layers.10.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 8401920 + }, + { + "name": "model.layers.10.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 18887680 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27276288 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27280384 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 27284480 + } + ], + "md5sum": "1688f84e4f7922f76817b6bb23ac1384" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "de14a5223492bc23ce31d025d7344400" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "b6672bf8dba2cac2b56ff2a25b32e42a" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 29373440, + "records": [ + { + "name": "model.layers.11.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 10485760 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18874368 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18878464 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18882560 + }, + { + "name": "model.layers.12.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 18887680 + } + ], + "md5sum": "9eae6a11b421f3ce61171309b12aed24" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "c0a03bfde4dbb8e009498dfecb3e8265" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "002e4817886977ee630d7042d6d5a09b" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "ace1c86724325188be11dff0efc4ee5c" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "dd6107ab54b454867c2be25833ceddf5" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 27289600, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8392704 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 8396800 + }, + { + "name": "model.layers.13.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 8401920 + }, + { + "name": "model.layers.13.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 18887680 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27276288 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27280384 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 27284480 + } + ], + "md5sum": "29896a6487425c50ce734d63c64a0045" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "b9813a99205422ef528dd478f0e0426e" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "1a9ec90c169d1163127c0060ade7e033" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 29373440, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 10485760 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18874368 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18878464 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18882560 + }, + { + "name": "model.layers.15.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 18887680 + } + ], + "md5sum": "b815ce34bc6d755e86aaa243d10e785f" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "5bfced7df7530f5b0e2b5dd35d9e483e" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "a4c0ff835bf6654a4adde03177309a07" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "c5eec349a32db23bd55b8ec57df3ab41" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "88771c127d151a4f3980537e800dc1a7" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 27289600, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8392704 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 8396800 + }, + { + "name": "model.layers.16.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 8401920 + }, + { + "name": "model.layers.16.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 18887680 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27276288 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27280384 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 27284480 + } + ], + "md5sum": "a61edfef11446ca99e769e1df08e1267" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "ac2fdf4213976668e5aa80b1d262b200" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "73d57042875e2e07e4b0f609e017c735" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 29373440, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 10485760 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18874368 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18878464 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18882560 + }, + { + "name": "model.layers.18.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 18887680 + } + ], + "md5sum": "3abb7cf0a44a0b73e5137b61d1dabddd" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "c0f9b1d551c5f23d140dbdb87314eb3c" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "04f9bd1796ef023f16c8f879227f18d8" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "7bc4817941104c75718246d974ed5193" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "8e9a521dbceb7b63b33a46b7c5280dae" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 27289600, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8392704 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 8396800 + }, + { + "name": "model.layers.19.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 8401920 + }, + { + "name": "model.layers.19.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 18887680 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27276288 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27280384 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 27284480 + } + ], + "md5sum": "db8679b3c3a963a2ecd1d8291d4a0cab" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "5f68575c10b75b3a9c0e4f1170f561f6" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "74c5a55fb43017a54caf745862f3a5ff" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 29373440, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 10485760 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18874368 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18878464 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18882560 + }, + { + "name": "model.layers.20.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 18887680 + } + ], + "md5sum": "67cbfe7a7e0885134b290b5c53a399b9" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "214347f2fb8081a59b2c862cc9219e30" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "e4a4529f892e439e12c31bfb13046593" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "1a1ca8b9926654a6a926464cd0a8c964" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 27289600, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8392704 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 8396800 + }, + { + "name": "model.layers.21.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 8401920 + }, + { + "name": "model.layers.21.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 18887680 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27276288 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27280384 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 27284480 + } + ], + "md5sum": "b3606dd298a4e5bb260e7d70987b6694" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "fcdec8529d65079960749f437f2a4f79" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "b85856695ac2cc6b23b43984d4246946" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 29373440, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 10485760 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18874368 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18878464 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18882560 + }, + { + "name": "model.layers.4.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 18887680 + } + ], + "md5sum": "00ae1c8d4f65260416161a405e1481ee" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "0ddf1424188e8231e7ba3d3136bb35d4" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "098629b017a248912fa17e9032ccbe42" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "2fa47fd530a7896a5d3f164798004f5d" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "02c980ee910a4030b28037c0ffbc0242" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 27289600, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8392704 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 8396800 + }, + { + "name": "model.layers.5.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 8401920 + }, + { + "name": "model.layers.5.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 18887680 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27276288 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27280384 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 27284480 + } + ], + "md5sum": "9d325e8a401014b4354d693f03b067f1" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "d7d9998f5372b2a62e6700e14dba76ab" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "d5f943fef4c70ad775575f0b9272269a" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 29373440, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 10485760 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18874368 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18878464 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18882560 + }, + { + "name": "model.layers.7.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 18887680 + } + ], + "md5sum": "821e893e7e7007d1b2c54ad1a1e5e409" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "b5c48f3ece0b879d6a2552db05b19cfc" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "9b819fb21b6a3b345e1c27f7e6d71e9a" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "5f909663574916741ceed1c39d858df6" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "779156a50a2d480c5b03179af05548e6" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 27289600, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8392704 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 8396800 + }, + { + "name": "model.layers.8.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 8401920 + }, + { + "name": "model.layers.8.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 18887680 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27276288 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27280384 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 27284480 + } + ], + "md5sum": "4f1d844111ca65fdb37ee3a1a7e84765" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "16fb473b08fb56dd88d676eedb48193b" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "8c0f189b6ff437d8519774cec2d07753" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "88e2891c863f2fe0c52f2d56ba4f795e" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 29373440, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 10485760 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18874368 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18878464 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18882560 + }, + { + "name": "model.layers.22.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 18887680 + } + ], + "md5sum": "2a7200dee16602ffda10b634e5f5b6e7" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "306407ec30a7cf3667b48023b469eb8c" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "3e9c86504bb7f37a115b73b2a89ad9ef" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "6aa6c27ceec822684eb2947e6ab6c844" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "776b5452c9e1347a040b3197973a5bd7" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 27289600, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8392704 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 8396800 + }, + { + "name": "model.layers.23.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 8401920 + }, + { + "name": "model.layers.23.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 18887680 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27276288 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27280384 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 27284480 + } + ], + "md5sum": "b8cded6bb57028d8caee5d16685eda56" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "8e00da96cd0492374f4c113556feadd8" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "5ff80e592ca02bcc840f2c45ffb377a3" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 29373440, + "records": [ + { + "name": "model.layers.24.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 10485760 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18874368 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18878464 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18882560 + }, + { + "name": "model.layers.25.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 18887680 + } + ], + "md5sum": "6000b1252c865c1244a2bae136d7ef20" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "f8f1f022388cc0617f8d740c0fb7dafc" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "4b4e9ff85306691dec1ebc118da45aab" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "8567f854abc6dec3f694600ef5de6cb1" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "306fd8e1d1fb539cb4b0243206b70cf9" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 27289600, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8392704 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 8396800 + }, + { + "name": "model.layers.26.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 8401920 + }, + { + "name": "model.layers.26.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 18887680 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27276288 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27280384 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 27284480 + } + ], + "md5sum": "96cd5edca290c894a0b8556be3db83fa" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "6d7eec6feefa599db05df37bdafd0942" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "9c5eafa3521197bbe7e228bd348cb615" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 29373440, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 10485760 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18874368 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18878464 + }, + { + "name": "model.layers.28.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18882560 + }, + { + "name": "model.layers.28.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 18887680 + } + ], + "md5sum": "e686501bcee88364812b9aa417379861" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "25a1442240f3548f9ed920138d6f2302" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "649c542a832e9f48252b1e9837f813fd" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "d331c2101d88423c9f1666a2c683c22b" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "e4975ad33de1bf0d90a1473dc4bf930a" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 27289600, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8392704 + }, + { + "name": "model.layers.29.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 8396800 + }, + { + "name": "model.layers.29.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 8401920 + }, + { + "name": "model.layers.29.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 18887680 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27276288 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27280384 + }, + { + "name": "model.layers.30.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 27284480 + } + ], + "md5sum": "6bc588c7e64f3cc48aa39089ab27d9dc" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "854d99689ea537c3f5538c3512908a21" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "729f4f7b87185ef60bce343e60e219d9" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 29373440, + "records": [ + { + "name": "model.layers.30.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 10485760 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18874368 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18878464 + }, + { + "name": "model.layers.31.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18882560 + }, + { + "name": "model.layers.31.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 18887680 + } + ], + "md5sum": "c6eb60e2bb0ae2c5fe0856c13dc51f34" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "59b8945b6e2c7116a9e142c52b4f2a7d" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "4860f118c1b7a21d144ffedf118af89d" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "3eced39182906479bc26b68da5f80148" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "0fe17a873e5d718536793729d0e2712e" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 27289600, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8392704 + }, + { + "name": "model.layers.32.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 8396800 + }, + { + "name": "model.layers.32.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 8401920 + }, + { + "name": "model.layers.32.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 18887680 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27276288 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27280384 + }, + { + "name": "model.layers.33.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 27284480 + } + ], + "md5sum": "40c0dfa72b018a22d11d30a45e3bb978" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "9b73caae98cd7e47ebb08c804f468df2" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "92f1f90019615d78f7f6ef17873e477d" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 29373440, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 0 + }, + { + "name": "model.layers.33.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 10485760 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18874368 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18878464 + }, + { + "name": "model.layers.34.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 18882560 + }, + { + "name": "model.layers.34.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 18887680 + } + ], + "md5sum": "4806c3a4257b09a7a8dff09505f59605" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.weight", + "shape": [ + 2048, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "1de221761f59f15b8d24b05258fc6f89" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 90177536, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.weight", + "shape": [ + 22016, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 90177536, + "byteOffset": 0 + } + ], + "md5sum": "8f79a06439059bdb5f769b209f66aa4c" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 27280384, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8392704 + }, + { + "name": "model.layers.35.self_attn.c_attn.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 8396800 + }, + { + "name": "model.layers.35.self_attn.c_attn.weight", + "shape": [ + 2560, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10485760, + "byteOffset": 8401920 + }, + { + "name": "model.layers.35.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 18887680 + }, + { + "name": "model.norm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27276288 + } + ], + "md5sum": "6e3f4ecf3c3a86b1f33b4642e6de9869" + } + ] +} \ No newline at end of file