diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,3651 @@ +{ + "metadata": { + "ParamSize": 260, + "ParamBytes": 5590886400.0, + "BitsPerParam": 16.0 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 257556480, + "records": [ + { + "name": "lm_head.weight", + "shape": [ + 50304, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 257556480, + "byteOffset": 0 + } + ], + "md5sum": "3fe5bd4a4f9f2a4eaf2f66d00d66f6cd" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 257556480, + "records": [ + { + "name": "model.embed_tokens.weight", + "shape": [ + 50304, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 257556480, + "byteOffset": 0 + } + ], + "md5sum": "efecdd96f440bdec0e9be5ccf8c8164c" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "71be470d9b114197ec0ca56fab78083b" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "3fe568c7389b479b609328ca89bfecf2" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "a1226dfeb64b9481daee061324dc381e" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "2f197a6ce40998eb5474131ded168adb" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "bb6f995175bfed48d0464e0a9c0bbcb7" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "f0776e80d3a83241b07bdc3555411b64" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "1974260cd01d9f272b191d3c1e01345d" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "55c4431ab7393cda7aea61d49c89b4fb" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "52744abe66a1554f1335c411add0c1a9" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 26275840, + "records": [ + { + "name": "model.layers.0.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 0 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 5120 + }, + { + "name": "model.layers.0.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 10240 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 15360 + }, + { + "name": "model.layers.0.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 20480 + }, + { + "name": "model.layers.1.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13127680 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13132800 + }, + { + "name": "model.layers.1.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13137920 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13143040 + }, + { + "name": "model.layers.1.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13148160 + }, + { + "name": "model.layers.10.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26255360 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26260480 + }, + { + "name": "model.layers.10.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26265600 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26270720 + } + ], + "md5sum": "94a3babeb9c835014b02a24264133900" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "810e1a6020f421b3f1d5bbd8eebdf974" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4407252f8a3449bb91dd69a7038ad262" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "33a9f9235aed6270b50c3d2523ef95a6" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "758b9b43c35de5be520404cf05de20a4" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "be885d395d86d4170aa763d672429508" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "e25426cfa0305a8210071fbe98859931" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.11.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.11.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.11.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.12.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.12.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "92d6af18eea838627920f2a045480205" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "faefea1a12bbdc090755c8af574a1ad0" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "40ffc41479d59c87d139a3470dc1ea95" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "2c3c44a16b485a15ca9a504770e6cabf" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "deb7301bfbb90740f115f446bdc04e9d" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4a6e0586d024d0b03456d97c74cb62cf" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "f1f83b10eb6d6c17454245210e8c399f" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.13.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.13.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.13.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.14.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.14.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "db91bb3f779cb2d336c1d8ec179b929b" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "b2340442f562a0bd1f1d344e3079323e" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "287cce23fdf23b96fb1fa99f698b6c26" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "0978069f8da4b77f4cf21a2ab7476e97" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "349d5bb4db6fe9bf12cbd9a32332fb54" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e47da433611d265386d834f590b196f5" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "c8b2296a5a6c88bef3ebab07312f9c60" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.15.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.15.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.15.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.16.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.16.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "ef170619b82a5fc45d68300661ae7baa" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "5cd0395e7241e8b79c2070f45af38c83" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "592f38cc1adad4e18ae56f4e153f00fc" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "b6097f02cf6ab113ff590d125e99770e" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "6f943240804d977c2b0320ccaefaad40" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "02bc28913e07829457e3800088d5cf33" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "105b18d625284662101b1a5bd3d5c852" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.17.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.17.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.17.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.18.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.18.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "4e859b519940019435f8a998201b0faf" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "639a99d7a62ca7a829e487dd56745c14" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "dc92e7805f350afbe4f465f373e1cf77" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "72b27f22195bc7f63a4ccc20c2703df3" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "1ab73e63b63a718d9bdaade10798accf" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "9816dd0d1e94dfb11a54b630d32c233c" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "ec71fb8a799cda30c8ce9650ea109ada" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.19.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.19.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.19.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.2.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.2.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "83fdaa708bb3dd9b025a8ba6b6e361c7" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "72ee7902e25bd0204dd6e5a4520a2bdb" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "68a5536ad8686de85ec406676b80898f" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "af5393b4eb9a3e8daaddf809fcbe2295" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "8702bfb09cf196f0d5b3252a4f09cf57" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "df5b96a84f3a9c18b9e30513bceeb625" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "83c1cf274d68d7a9aa08688e303115c5" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.20.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.20.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.20.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.21.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.21.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "c3b8f636c85bc2a910c3f037887b15b2" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "6b6960945be3afa236db3b4127d3222f" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "ca8d85f1e79044b1b9a2659a45015273" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "b7ad9a2a539b5f24d4c9d788f01ccbc8" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "8ca2a7663b9a252e4dc8bf9ef55f4a7f" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e464fa88c63bcded773c0303fee4f324" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "13c500e5c03a7ba9d0a443c8aa11fd46" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.22.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.22.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.22.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.23.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.23.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "1a8d4a29106236515980bff6de849064" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "9102cc6f709ec133f4bc59e73b178e01" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "bb0042e58f92a354109a76640fc276ac" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "157a47b48ae2a67344056bdc946c3f1a" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "11eef381c0e39e0ab7ccdb3e8b95e9ce" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e39569881ebc677c99d53a24859f39c3" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "2d7ae1daeacaccb9cb219ac43d8cf2ab" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.24.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.24.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.24.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.25.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.25.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "f96f3ddc4f2cba0e2fdb7b6fc3fb15e7" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "9bd487194084cf2c29bce22216119fd5" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "31c70f5d3b143a8d92d912ed15be4031" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "1a195eb40893265026028b6b59e00707" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "bf6ef05bf824ec62d4e3bfeff70cc1f4" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "065aef07386c786d14898421ccd045a4" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "530cb6cffbb73b22dcbe80970cb5f70d" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.26.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.26.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.26.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.27.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.27.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "156e83b98ddcd2b973a7acf86f111359" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "8ea4b3cf13aed5ef72f8a5f06e27ab38" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4b5d84e1d62b8af239a4b81b49f2077b" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "a236affe43869798e15917de7115f26a" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "90b66a559afa774ffa065010fb844791" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "310c1aa8972e995010e7b4fd9bb566b6" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "4ba6e45669d26ba44680c9d3f09ade3d" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.28.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.28.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.28.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.29.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.29.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "e2a6068a4f0e53b5e5bb79fdeee4fa89" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "c20779e64b9c917a9c72301e258c24a3" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "ce801d0695e8ced72de0742bf401a38b" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "8eb23c0c02032bf001bbdf619189543a" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "1ea4b92122baaea1cffeb56e4895001d" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "681a5cfd6a06e10613e3e475b2a4ac3f" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "cab1a01c061932e3a9502c828f5ca264" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.3.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.3.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.3.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.30.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.30.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "e3a2dd026b4b9098a09b18ddf8c0e9b5" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "01f46d485e23daa18d1fbdcb8b665000" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "953efb8803b9808aa6145165d4c32f64" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "ac7d57bb9da0334d7c0e81e2f0324dc7" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "fd0855613114ee04ffb7f899b2181e32" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e900a53824df53faab5e4da3e611ce26" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "3e1bbd34897b6ed08b3419c8dfec78fd" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.31.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.31.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.31.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.4.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.4.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "83b1ce06070efb9ed80f8f32b36e19c6" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "afa45f36d1f28f07245316cbbbf01122" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "58356763043ba228edf906a84d0b31d6" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "0a5cec9a17c762bb340198743c8913e3" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "f53ea634720e9e83dca397d71c16c2e3" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "1fbc191907f4cfadf7f50045a9d9087c" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "5d6686cb230657d72e1abfab21892afc" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.5.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.5.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.5.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.6.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.6.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "d62934d2e3fc5eb80995a93b39bdb9a4" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "e19ded8005f3d5bd827f316da9834c66" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e06bec900e03b16732cfe24ba383fcd9" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "f8445d1f43562d6443bef65865b8e985" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "ceda8635165632304d055599d976e7e9" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "884b876299824ec6b06233f60988b2dd" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "d9178a0fd1973b8db4beed3376a90ec9" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 26255360, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.7.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.7.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.7.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.layers.8.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26240000 + }, + { + "name": "model.layers.8.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26245120 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26250240 + } + ], + "md5sum": "8b8f71ca21ac05fa2c084004ad71d41c" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.weight", + "shape": [ + 2560, + 6912 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "dc215c9785e706891a8d6492cbcd30b4" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.weight", + "shape": [ + 13824, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "2fd6b48899e9ff1b7267d56d499c68cf" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.weight", + "shape": [ + 7680, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "17eb3a7283b3d7cadcec708d418da2e0" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 26245120, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.9.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13107200 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13112320 + }, + { + "name": "model.layers.9.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13117440 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 13122560 + }, + { + "name": "model.layers.9.self_attn.o_proj.weight", + "shape": [ + 2560, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 13127680 + }, + { + "name": "model.norm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26234880 + }, + { + "name": "model.norm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26240000 + } + ], + "md5sum": "b35dd5d9bf9f6ecf239789e997be3407" + } + ] +} \ No newline at end of file