{ "metadata": { "ParamSize": 260, "ParamBytes": 5590886400.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 257556480, "records": [ { "name": "lm_head.weight", "shape": [ 50304, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 257556480, "byteOffset": 0 } ], "md5sum": "3fe5bd4a4f9f2a4eaf2f66d00d66f6cd" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 257556480, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 50304, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 257556480, "byteOffset": 0 } ], "md5sum": "efecdd96f440bdec0e9be5ccf8c8164c" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "71be470d9b114197ec0ca56fab78083b" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3fe568c7389b479b609328ca89bfecf2" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "a1226dfeb64b9481daee061324dc381e" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "2f197a6ce40998eb5474131ded168adb" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "bb6f995175bfed48d0464e0a9c0bbcb7" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "f0776e80d3a83241b07bdc3555411b64" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "1974260cd01d9f272b191d3c1e01345d" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "55c4431ab7393cda7aea61d49c89b4fb" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "52744abe66a1554f1335c411add0c1a9" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 26275840, "records": [ { "name": "model.layers.0.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 5120 }, { "name": "model.layers.0.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 10240 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 15360 }, { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 20480 }, { "name": "model.layers.1.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13127680 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13132800 }, { "name": "model.layers.1.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13137920 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13143040 }, { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13148160 }, { "name": "model.layers.10.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26255360 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26260480 }, { "name": "model.layers.10.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26265600 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26270720 } ], "md5sum": "94a3babeb9c835014b02a24264133900" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "810e1a6020f421b3f1d5bbd8eebdf974" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4407252f8a3449bb91dd69a7038ad262" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "33a9f9235aed6270b50c3d2523ef95a6" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "758b9b43c35de5be520404cf05de20a4" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "be885d395d86d4170aa763d672429508" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "e25426cfa0305a8210071fbe98859931" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.11.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.11.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.12.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.12.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "92d6af18eea838627920f2a045480205" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "faefea1a12bbdc090755c8af574a1ad0" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "40ffc41479d59c87d139a3470dc1ea95" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "2c3c44a16b485a15ca9a504770e6cabf" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "deb7301bfbb90740f115f446bdc04e9d" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4a6e0586d024d0b03456d97c74cb62cf" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "f1f83b10eb6d6c17454245210e8c399f" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.13.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.13.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.14.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.14.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "db91bb3f779cb2d336c1d8ec179b929b" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "b2340442f562a0bd1f1d344e3079323e" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "287cce23fdf23b96fb1fa99f698b6c26" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "0978069f8da4b77f4cf21a2ab7476e97" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "349d5bb4db6fe9bf12cbd9a32332fb54" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e47da433611d265386d834f590b196f5" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "c8b2296a5a6c88bef3ebab07312f9c60" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.15.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.15.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.16.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.16.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "ef170619b82a5fc45d68300661ae7baa" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "5cd0395e7241e8b79c2070f45af38c83" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "592f38cc1adad4e18ae56f4e153f00fc" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "b6097f02cf6ab113ff590d125e99770e" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "6f943240804d977c2b0320ccaefaad40" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "02bc28913e07829457e3800088d5cf33" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "105b18d625284662101b1a5bd3d5c852" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.17.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.17.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.18.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.18.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "4e859b519940019435f8a998201b0faf" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "639a99d7a62ca7a829e487dd56745c14" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "dc92e7805f350afbe4f465f373e1cf77" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "72b27f22195bc7f63a4ccc20c2703df3" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "1ab73e63b63a718d9bdaade10798accf" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "9816dd0d1e94dfb11a54b630d32c233c" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "ec71fb8a799cda30c8ce9650ea109ada" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.19.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.19.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.2.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.2.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "83fdaa708bb3dd9b025a8ba6b6e361c7" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "72ee7902e25bd0204dd6e5a4520a2bdb" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "68a5536ad8686de85ec406676b80898f" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "af5393b4eb9a3e8daaddf809fcbe2295" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "8702bfb09cf196f0d5b3252a4f09cf57" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "df5b96a84f3a9c18b9e30513bceeb625" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "83c1cf274d68d7a9aa08688e303115c5" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.20.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.20.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.21.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.21.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "c3b8f636c85bc2a910c3f037887b15b2" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "6b6960945be3afa236db3b4127d3222f" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ca8d85f1e79044b1b9a2659a45015273" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "b7ad9a2a539b5f24d4c9d788f01ccbc8" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "8ca2a7663b9a252e4dc8bf9ef55f4a7f" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e464fa88c63bcded773c0303fee4f324" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "13c500e5c03a7ba9d0a443c8aa11fd46" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.22.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.22.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.23.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.23.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "1a8d4a29106236515980bff6de849064" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "9102cc6f709ec133f4bc59e73b178e01" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "bb0042e58f92a354109a76640fc276ac" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "157a47b48ae2a67344056bdc946c3f1a" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "11eef381c0e39e0ab7ccdb3e8b95e9ce" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e39569881ebc677c99d53a24859f39c3" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "2d7ae1daeacaccb9cb219ac43d8cf2ab" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.24.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.24.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.25.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.25.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "f96f3ddc4f2cba0e2fdb7b6fc3fb15e7" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.26.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "9bd487194084cf2c29bce22216119fd5" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "31c70f5d3b143a8d92d912ed15be4031" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "1a195eb40893265026028b6b59e00707" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "bf6ef05bf824ec62d4e3bfeff70cc1f4" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "065aef07386c786d14898421ccd045a4" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "530cb6cffbb73b22dcbe80970cb5f70d" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.26.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.26.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.27.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.27.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "156e83b98ddcd2b973a7acf86f111359" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.28.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "8ea4b3cf13aed5ef72f8a5f06e27ab38" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4b5d84e1d62b8af239a4b81b49f2077b" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "a236affe43869798e15917de7115f26a" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.29.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "90b66a559afa774ffa065010fb844791" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "310c1aa8972e995010e7b4fd9bb566b6" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "4ba6e45669d26ba44680c9d3f09ade3d" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.28.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.28.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.28.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.29.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.29.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "e2a6068a4f0e53b5e5bb79fdeee4fa89" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "c20779e64b9c917a9c72301e258c24a3" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ce801d0695e8ced72de0742bf401a38b" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "8eb23c0c02032bf001bbdf619189543a" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.30.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "1ea4b92122baaea1cffeb56e4895001d" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "681a5cfd6a06e10613e3e475b2a4ac3f" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "cab1a01c061932e3a9502c828f5ca264" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.29.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.3.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.3.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.30.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.30.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "e3a2dd026b4b9098a09b18ddf8c0e9b5" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.31.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "01f46d485e23daa18d1fbdcb8b665000" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "953efb8803b9808aa6145165d4c32f64" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "ac7d57bb9da0334d7c0e81e2f0324dc7" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "fd0855613114ee04ffb7f899b2181e32" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e900a53824df53faab5e4da3e611ce26" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "3e1bbd34897b6ed08b3419c8dfec78fd" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.30.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.31.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.31.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.31.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.4.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.4.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "83b1ce06070efb9ed80f8f32b36e19c6" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "afa45f36d1f28f07245316cbbbf01122" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "58356763043ba228edf906a84d0b31d6" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "0a5cec9a17c762bb340198743c8913e3" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "f53ea634720e9e83dca397d71c16c2e3" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1fbc191907f4cfadf7f50045a9d9087c" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "5d6686cb230657d72e1abfab21892afc" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.5.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.5.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.6.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.6.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "d62934d2e3fc5eb80995a93b39bdb9a4" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "e19ded8005f3d5bd827f316da9834c66" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e06bec900e03b16732cfe24ba383fcd9" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "f8445d1f43562d6443bef65865b8e985" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "ceda8635165632304d055599d976e7e9" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "884b876299824ec6b06233f60988b2dd" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "d9178a0fd1973b8db4beed3376a90ec9" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.7.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.7.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.8.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.8.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "8b8f71ca21ac05fa2c084004ad71d41c" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "dc215c9785e706891a8d6492cbcd30b4" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2fd6b48899e9ff1b7267d56d499c68cf" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "17eb3a7283b3d7cadcec708d418da2e0" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 26245120, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.9.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.9.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.norm.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.norm.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 } ], "md5sum": "b35dd5d9bf9f6ecf239789e997be3407" } ] }