{ "metadata": { "ParamSize": 885, "ParamBytes": 40900313088.0, "BitsPerParam": 4.157646319274502 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 622854144, "records": [ { "name": "lm_head.q_weight", "shape": [ 152064, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 622854144, "byteOffset": 0 } ], "md5sum": "2f8a3672ab7cc8e433de36e1f192d3ee" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 77856768, "records": [ { "name": "lm_head.q_scale", "shape": [ 152064, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 77856768, "byteOffset": 0 } ], "md5sum": "77b57102e7539c441b60cf59d2514e20" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.79.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "d64e72517cc4fb5a743b81e596f3498a" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "cb968c086d88e025ee1302911ba24066" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "7f82797d4537782d5a4377fbaa09eb57" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 622854144, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 152064, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 622854144, "byteOffset": 0 } ], "md5sum": "53e3d0004a69bffaa8da854cf98f9977" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 77856768, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 152064, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 77856768, "byteOffset": 0 } ], "md5sum": "831e09c62211879b07e3d618c0073aa7" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "91b2c9af1d2ebc69656227f5958c82d5" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "415dbfcfad792f80410ac065e9d9752b" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "82962f312b607593130f16bf7d8601fb" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "94c947dfbe652f98a5422307cfdfe563" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "f23959aab7852eaa57579ca91f8bb28c" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 24662016, "records": [ { "name": "model.layers.79.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 0 }, { "name": "model.layers.79.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 16384 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15155200 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15171584 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15187968 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15208448 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20451328 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24645632 } ], "md5sum": "26b224dc22b53dcf13fefdda046b0d09" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "5c4610d182bcb4f23691c1e03a3fd446" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "22e57c1acdf163f7fee7067e2b82c50a" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "c57cd52fe46900e38d1f61a151e5ffde" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b6b8e3a33346ebe077fd0f2b9543888b" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 30347264, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15155200 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 15171584 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30310400 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 30326784 } ], "md5sum": "5676a9f745d5d86f1a05e0b095fbea48" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a082bbb7d0ebc82ba407c5f35317b089" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "08f1e8ad8df5f8243107f7e14a6c2497" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "1436f3f47bbfe31e17c0dd6dd7381954" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "dd4ac404e29cf5cb5dbe9a03f4ca696d" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "96058e3cbcf026e5fdb367d597dbb8ae" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6509f402d96bedda6c9538337c0f587c" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 29872128, "records": [ { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 9453568 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24592384 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24608768 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24629248 } ], "md5sum": "74edbb75879b061830bf9fe542b049b0" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "0cbb40adf30e066aaa0917b418af5ac1" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "5c417fa53ba369072beab5f8756e1161" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "93d183986cf3f68c4a6d7a9e73491abd" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7339dcc57269f056653ce2bc0f9dfac2" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "81eb73d8f422861928ccd028e0110647" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "0de082783c7f3b746ca03cd8605215ae" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 28839936, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4210688 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19349504 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 19365888 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 19386368 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24629248 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28823552 } ], "md5sum": "324946fddfa35486688c1191064ef58d" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "b9c07636ded1a629c06114b4d4c19697" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "ba893847cd7e01b271df4fc63dae3b71" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "04eb1cfe7aea17ccced76bbd52872f0b" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a965086a064b0032b0d5778bbabdaa93" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "6b432a0a6ebda9244163c2780e237758" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "636a6d6585f9d0ed311eccd676519f6b" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "e2f074b04163d1a4918f438b00c6ee0d" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "a2e607b3088ac83176b3a29e0fb3a66b" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "5750423e6464274708776aff0a3103c0" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "14553b18d62011142afee8a2cdabf638" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 30347264, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15155200 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 15171584 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30310400 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 30326784 } ], "md5sum": "af3d1e8ff1e9ed6658b123843d7c3530" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a174af48c1399b495282a37143795fd6" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "900e6e24dba2d9582e2a450a266ae8ce" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "0a596a1d79708c68cc3da9ac8e393cb4" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "86c3000637673bd08769ed27a27a58b8" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b70a7fcdc7bc5db15ea0e23b3a2c9799" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "79c5e5e0096ee3b2226ce143509b044a" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 29872128, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 9453568 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24592384 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24608768 }, { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24629248 } ], "md5sum": "f002da3e36563b8c26fc798eaf989d01" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b00e9cc27bd5bd81a48f1f4262b4f3eb" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b0fc9dc9365ae20fc8d1f38e0370f2b6" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "4a9a41f32de238d5a4390294113fd2b8" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "069c10167ff97ad4c16f73f744f7407f" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "922a22388db55893b949127e0a8d69ab" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "95de3d8cbbe343f146efd48ae9529283" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 28839936, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 4194304 }, { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 4214784 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 9457664 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13651968 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 13668352 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28807168 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28823552 } ], "md5sum": "6cd2786b43aef81bb7687fdb40b8e195" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "160fb95202e61526f8af0dbf5a1c5259" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "0c92c73adc24c9232147391f9c7bb64c" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "43de349f22f9ca1cf129653a9ac1bbfb" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b516a5f51df4797e367cb205285e5f3e" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "707762343c44569703b9b96fdfc1d9a0" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "b648dea97a8aa7742972c0afeeb54f31" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9cc57f34aa47cfee623d6cd74cc72ace" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "234692c7e226f0d83e22177d40796ef3" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 29876224, "records": [ { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24612864 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24633344 } ], "md5sum": "fc8a0d25e7e4f1c3db46b03e35e067ac" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "e5a2055170e6cb53f2f2ed8f94f108fb" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "db61e774b8e45b365e9114121cd7e639" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 19382272, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4210688 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19349504 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19365888 } ], "md5sum": "1cb470986437a7394f9f3b7e4b8fd0f3" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "b08888b5ebb66c833f0922f976ec99c1" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "f34a86d8e2e8ae37f2789f5132429821" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1c47d42787be4eeb6a88091b2b8edb78" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1e2e149cb04712c973e8a7030fdacd10" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "d9522a6f6bc4599e64123f92d9c76b46" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "f6bf14900cba477183e6362eb4b44482" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "17c9a8ab9b73363a92bebbf3aba9b8fd" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e474ce06e13eb4bd86fbc4349720cb9d" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 29876224, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24612864 }, { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24633344 } ], "md5sum": "d27d07d2eb44b0248b17d58b9196d6c7" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "44e502cfa2cc86ffe3176cca07ede577" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "15066a291ea256f413f112eecb24470e" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 19382272, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4210688 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19349504 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19365888 } ], "md5sum": "4815b6bee858cb152eef66d6023f87f8" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "5effe98ba80e5f2ad8790c8416b85285" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "5c3c039f0570562f774cbc192acc8e2e" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a2a0c39b13c75b917f8fcc518ad66903" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4e200618d7230aab50695f59436d06aa" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "db76503f52516822cf7694b14725a361" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "d4a81ebf5bad29e8df018a56d4454a4c" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "bb23fe109f2cca75b9b261f21acf1124" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "ab32f0a1a367bf129872e0b3551272ac" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "70762066f1d254a28b924364e6061d5b" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "85a6a80daeef1854eddc5a086d31ccf8" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "6543fe743be40bd2f8f1e5b74ccea0cb" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "9e7ee248b7f6a7d82caaef9d454a6336" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "ceeefed13e1fa9c4633332d11a99b1e4" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "317308944e4ce8c8658e06a5267e4e85" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "31b06ce8189dd85fd384d2f91202d0c6" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "21a4409978eeaa414cc706240f487b80" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "45991d5abff3fe99b63b8345489bf982" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "a710cf29e1417261e420e3238ee608f1" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "1627283f532e5e4438d1518d3b668bd0" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "fc42c462299f903046e9c37571abf904" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e3e59a1a8dd37a903b2e2d24a8fc1e20" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9b410840b2ca4577065d573b526dabc5" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b4e62b510a64e5cc8c658a6e9c9786a0" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "dd24c6b35b58168c667e37b243c616c3" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 29876224, "records": [ { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24612864 }, { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24633344 } ], "md5sum": "67b9eb87b0b42cee0c1d7e0c22821721" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "ef6b34f7d221210cd3f02c8ab0730944" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "6b27592bb6069ea9afe161676b23ffbd" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "7d0bb379df610e9085e4303f323489fb" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "ccc6e487585f0e1cc73e6d371d5b3f35" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 19382272, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4210688 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19349504 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19365888 } ], "md5sum": "a8d8368365868e97e4bcba2b3581c676" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "b59bf0f6c5cc5bf25929de8ab8dae09e" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "83dd56a77ea74a43ccbfb08203e90496" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.24.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "61f46a38a4274672b2e8129831c9f6bf" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d05e2452942cbab0e33463de5a462ffe" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "262154b988c12978297d3b255084fa14" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "0f25cf6a1e95b4af9c512d0d46b23fe9" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.25.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3146803e0394abbcd3d3c6a5f27716c2" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "08cbcc00ce3b222eedf94dd1616f971e" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 29876224, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.24.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24612864 }, { "name": "model.layers.25.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24633344 } ], "md5sum": "ff76a45de5598c2a3da9f76804939512" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "865b2ccbb2afc24864ff16f19dbc9cb6" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "1c56aa5377761508e6625d5c3987c816" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 19382272, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4210688 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19349504 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19365888 } ], "md5sum": "409e8ea170763ca15b6991257bad5f79" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "b6b78d1786982dc295d4958887585325" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "63177d909543be674ea5bd441ae796dc" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.26.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7c3517bf93a052147411a17542eb8cf2" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "483c6ac829daeb1cc9740290ea2caa1a" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "3a25b6382926452683b8710e149bc07c" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "8f5f36a8673eb5c04ee9d893066b10d8" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.27.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "09aa01dcadd2149b3c157f60990d288b" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1ffdb2155ef8d884cb03b30ff96e0784" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 29876224, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.26.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24612864 }, { "name": "model.layers.27.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24633344 } ], "md5sum": "dd64d7ef69ca1087ac094db738d7ae10" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "0e082228ab62be94797223446b19bedf" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "7e81e0fd49e7cf2d48d526dbc5e56157" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 19382272, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4210688 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19349504 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19365888 } ], "md5sum": "71d08916109ea623035f3813d5a19249" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "846f72a7e781c5d293aab94a6616bcce" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "ed6f3da26f1691fff911cff68922ebff" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.28.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "676d5a05a99b24ad9c24ddc497c5cce5" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7cdb40cd40712e28dedceacd58fd9a26" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "ec398f6181f3504eacc57231829b648b" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.28.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.28.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "b9c3ae1b5777c91bc46e2c9552857e7f" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "a2b641c95496e33dced68d9bae239083" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "33ba89df549e891019627165a7406690" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.29.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0ae1c06ed3e00ef04b84d8764daf1d7e" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "64b57a9e6f9a84f93a3861cc15b3c17f" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "881f9c4544e864135c756622004db44f" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.29.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.29.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "a322b1fc6065725c9195cc2d43f36805" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "257a277414497a1aa47bca6a4de908cd" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "1f28c4a7df95627a70caa0a9f7b7090a" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3a4a41d11285f38aca783d6ea60bd77d" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2a31c7bdab10e586a14d5aa21a48cddb" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "08481386b8ca5cdc916fb8264c13d8fd" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "f9d751c16a963a30b45c0e82f3886688" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "d9172d9da418b683da29cf7d8246c0a7" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "fe63caf69dbd25b97db7044da811c8d0" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8aaee9efa2bb95ae096aa1ae2557d5d4" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "670d1a7243911831c8da8792b7f58afd" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c5c4e5451ed2357ad9c12280b10ae3fe" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "df9b0348572c7c545a666c39be16dab6" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 29876224, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24612864 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24633344 } ], "md5sum": "97d4e9c1b4b28f9f009be14986ffa050" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "49ce1ca41c447a823475ff1d84281de0" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "3dbb0c7cf4c59c125b6ff87a14de5b5d" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "59b3b39bc34e6de15ea586583b6df4d9" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.30.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2de4e5abfb73fa4317810fc097a13c01" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "425f6d846b5e7f34e20a6512c5cf021d" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "3df370fed8f427c071d2193df99d577f" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 28839936, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4210688 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19349504 }, { "name": "model.layers.30.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 19365888 }, { "name": "model.layers.30.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 19386368 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24629248 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28823552 } ], "md5sum": "4f445f04e568eed445b758734ed64d84" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "79afc5d95d734f5c0f0c2d9819c2b7a2" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "2eabe83fd968e9753f7a1759484db121" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.31.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a17563a1643861d80f5dad5b652f816a" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b14c749675d4c709b4d50a546bb49bed" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.32.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1eab69fe809f08dc60f3484ee130644e" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f3ace20daa413828687289e06b143fae" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 29876224, "records": [ { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.31.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.31.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.32.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24612864 }, { "name": "model.layers.32.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24633344 } ], "md5sum": "12662ec45c5172285c0ed5c772382ab2" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "444f0dd770fb6b0e90c9492ab1818144" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "366d22ce0877ab40260072f200c9e6e4" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "45b0ed7aabc0d217fdf57324e7655877" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "d2b2e98797c98c652a3366200c5478e0" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 19382272, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4210688 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19349504 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19365888 } ], "md5sum": "bf6d38345d95620b824edcf9393eeb24" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "6f1fd0fbde689acd57fb6042229864b2" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "d42e2bd89fea706ba7b91c6cce489b23" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.33.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7ad48b43b9bbbe36363c108ee41a5714" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "46fc40297b6d4ab71a2982edb75fce29" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "ecb5420b030b7c1f751c21803dac0855" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "1770b3c7a316e1ed18ef7b2583785a9c" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.34.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1f4654158d3c57f24335e4cbde42c584" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "523e927e9173ce3cbfe7cc45363a6536" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 29876224, "records": [ { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.33.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.33.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.34.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24612864 }, { "name": "model.layers.34.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24633344 } ], "md5sum": "35ffabae887de0b65986e30dd925b497" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "cadbf190f744621e094b012490d4f535" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "ed0d2a6e4eac2ad244e2757d7f0f49a9" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 19382272, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4210688 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19349504 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19365888 } ], "md5sum": "5861fe8761258a73b11979e77758689b" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "980c3c941074bf6843a988e7669366a3" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "a0a256ae7b28c32fc7b094846db00f4a" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.35.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "06ff20586ca4a13c11e6bd3016010dd4" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "89e20162f4ddff2e21ac67d0ad13198e" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "4b1b9334987501cf099bd79c16c72372" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "cdb1d04bf0f465d41b16c3e52ab56e79" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.36.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "bd635827fd6f3f21c3889b2681f0bb2e" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ce705b4fdd23805f320cd6e923574c0c" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 29876224, "records": [ { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.35.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.35.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.36.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24612864 }, { "name": "model.layers.36.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24633344 } ], "md5sum": "322f2722fc8c9ab9d500a9eec83dcca0" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "66a5299225898109ed12e9e60b464e22" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "cc7ab9fdbb3728004dfad2a2f6da966c" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 19382272, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4210688 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19349504 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19365888 } ], "md5sum": "5aed39d0d8de984ee0fe0a206acc64c7" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "5a20581b759430fde5014f442c75c3b5" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "6f395db9341c44ebf94604aace6fa67b" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.37.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "93e8373bbe22a61dbfcd5527bab490f2" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6d81616dfa031fffffd48d1eeac9fda4" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "636ff25ab2b4f4b7a2a430742bc1d434" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.37.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.37.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "c6eda978d4b415f4794105fd3eee294c" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "bc6fa509dea70cb5f4f0b407b7ea7141" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "f39533cf489a5a6bf3873efb2757f5fd" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.38.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6daea4522638d34d9d97369ce7834a7d" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "aa9b621d25d6119973d5298f77658605" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "72c635a0155b71835e7014b246d6c874" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.38.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.38.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "673f008a4d702b07023402a19af37430" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "199970431f1e6195acfd867393a99429" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "ddce19c530586e2a5b66986cb7a0d9d5" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.39.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "27e0a613addb76a2a0b51daebe9e10af" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e8de56ca0f9a1749e8ca9a5b546cb7d9" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "f2fc20bd738baa4b510b23afd8b6da7f" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.39.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.39.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "ab94c6b935acec2623e745baabc8c352" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "dc5d1b41f82bd4193a8b5e727381ff20" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "793dbc6f9ff4f8eb8a227f624f6e4c34" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.40.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e91abaade25ee36ceb0fe6801db662c4" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a7179cdb2dc1496f733ac9010b8791aa" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.41.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "be958e4e81379813e4c003665b4a96c9" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "bbeab937ed6f60b58a50c28cfea26205" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 29876224, "records": [ { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.40.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.40.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.41.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24612864 }, { "name": "model.layers.41.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24633344 } ], "md5sum": "957a94d31416d10ab00f10540960ac5a" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "e4cd651210acd9d7ea44ac3645b30eff" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "e4f0eeb4f3db416b37df2ce35457e330" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "5ca4cbf5c29ca7bd649a2a831778bd16" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "b85e25b9b51c3abd2c65859dcb12a897" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 19382272, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4210688 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19349504 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19365888 } ], "md5sum": "8829f387f8ed0c2dbf199df78636cc7b" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "9803153c05063000d600fd98a6c5652a" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "dcd7a766781cfed1f709b3560bd7aabd" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.42.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1e8ae0b39dd207b3652bac6edb54b0e5" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "02a5ce870a5c0eaa0eaddde8d8d666a2" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "81e8203db5acb7a05c2c8598e0eba942" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "3a4410beb425eefc6bc0f79230fd966c" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.43.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8c7f34439c4076f3263cc696b8d08a6b" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d83ddeaf1e90b9ce0b7bf7331bbc3425" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 29876224, "records": [ { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.42.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.42.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.43.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24612864 }, { "name": "model.layers.43.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24633344 } ], "md5sum": "893845464acdfa2831edc56ef190327f" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "1c82a837c24e848566a3774455c57605" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "3d3a1c7c5b4078ed609bdfef0cb8db83" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 19382272, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4210688 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19349504 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19365888 } ], "md5sum": "bb8de8d371ebc957dc666e38a689691b" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "39e40c8d348c61fc2d9e11d2453b7072" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "70531661185135dda2e4c6281071f975" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.44.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5c6db7ac8d94f56ea816aded4a69c2e2" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4053a0167df5539983dc993125fd903b" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "cfe28f99950e1f9e6e9e7c0120ff220a" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "7304cddd6f2549b652f0386213e8db09" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.45.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "aa5c2563ab6ce326aaec34a4a197549e" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "458bbdc1ad40ae963093cc1b29e52627" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 29876224, "records": [ { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.44.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.44.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.45.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24612864 }, { "name": "model.layers.45.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24633344 } ], "md5sum": "0db0118e4ebb5c32d3a9944cde14ec31" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "12b99a402ad237a632f1f9a12b710561" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "f13bf1a4a2c9842c65e13de494a26b4e" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 19382272, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4210688 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19349504 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19365888 } ], "md5sum": "5cc6f2dcc30adeb1969beb3b0780c4b1" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "dbdf08b7058856d3835bd151e5155253" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "e81e9fbd8da7d46d078aeac61a13f08c" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.46.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9f4b43468dd8d21b4465cb89abe80347" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "887abe843d9458be0394331da9c8d363" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "a97c8a49e4eea4f7637354ca14848dd0" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.46.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.46.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "0b6418b770b711f41bbd5d8860499631" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "2504681e588bddf9690d47bff04eecfb" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "8fb2be7b9c99ef847f9c4cde6cd351a6" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.47.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c7e37bdb93e4df9fff275ca001bbf383" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "682d323f22a3a44d89c6abff0e4271cb" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "2f1449c7d26995f48d7aa5a3dc8874bb" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.47.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.47.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "e268b0f0720703f2be2933761c4efe1a" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "ed325044293067e0da54d590dfef71a3" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "6472c6dc45a4a4009b868313243c72f0" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.48.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "82620ac454e23d382358e0012dea5af8" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4bf9c208f38fb7644525eef2f70edf6a" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "7d3edcc22483c0ab68d0891e11e4558b" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.48.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.48.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "6dc7fb81ad883ff7f33c3cf1edca9d53" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "2b7492011d415eb2c9a68c45eb6b7a58" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "e71f82b437fe24ef9a92741d693ae661" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.49.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "61d6ab534d8cdce587d2d0eaa61fd4e0" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a74570618926f159e9a14443f22acfbd" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.50.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "baafea3f032d484f3450150f623bcc03" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4947b2b1791f2e02eb5d73deb5f828b9" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 29876224, "records": [ { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.49.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.49.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.50.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24612864 }, { "name": "model.layers.50.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24633344 } ], "md5sum": "5456fb55aa5dcec34f22a739425f1bce" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "d9e98ccece990a9c60f135f1bfebfc3c" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "76e3dcc435ad3dc88b74b80fdf2d9fdd" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "412469dafed735aedf9d81e3137a5db8" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "e894ad96da0508ba5b375eaf3a725906" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 19382272, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4210688 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19349504 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19365888 } ], "md5sum": "9a3dc9403fff17b23a265a0488a69969" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "dfd70d19b6efaa9f6cbc3d6cbab3586d" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "f1819346d582a27ab453ab2345d5dbfe" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "cc84f05f914f28e731b11a2d1857efc6" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8669229944f212af6c7a57e686292286" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "4a8e36ecc6a48a4a28dcb014422e7b41" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "58ac484f6067968fb533b8ac4b6dacdf" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "35442fd12568fff47f66d72deeac0b83" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4089828c05082fbb0b4e9b94abb92827" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 29876224, "records": [ { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24612864 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24633344 } ], "md5sum": "004ab15a7d2d290a1338be93f720aa35" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "c1ea99f205341ed4f9539150338c8205" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "6d2e1b7c1007e9cdd301493b0c6e8114" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "dca08ed81a07a9eb70c92e0610b94da1" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "abf02ad634bfca0e86d3c8c807dbaa40" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 19382272, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4210688 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19349504 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19365888 } ], "md5sum": "c7d9a7c88b41cc21a4be21faa8d90399" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "08dd3bbf936fa5fdd8994d01ec89d46a" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "d5be546346ebda8dfddbec2d38631f94" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.51.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "98823b8cd5acef1cf2835e3a9b2ce819" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "082faef96eb7f1d2eeb49b1440161344" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "39b61062275673bfbe3b371ec02147ed" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "b0c76d60374188cd77c012307b5609aa" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.52.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "dec628d72f68852ce448c8071337a791" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "59d5bf25442c2b87957656d46e6143b9" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 29876224, "records": [ { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.51.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.51.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.52.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24612864 }, { "name": "model.layers.52.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24633344 } ], "md5sum": "16095e4ef8bc916b6df464fc8bc21bec" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "b04bd6b8edd9bb4ec2d87b9c28dc2f38" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "8237cfb3fd6ed0ddf37ddfef6336bc47" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 19382272, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4210688 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19349504 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19365888 } ], "md5sum": "8ac306ffb975b890336e6b31360d232c" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "6a71b1c3f101a07062e1a026648e5f8b" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "2f87fa83db49bb2daed18f4e20a6e6b1" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.53.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "99e7ff21a160c7396f8f3f49097d5bb2" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "01e6c6ae5be10511d7ae836f4b823601" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "57ce8e27766e49e49a3f756cafccdb80" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "3b0a67b627f41b636204a109cd71af52" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.54.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e1675b5669e3ca2e62c862ea18c51df1" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4e4670545c5b515ce27447afe50633fe" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 29876224, "records": [ { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.53.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.53.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.54.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24612864 }, { "name": "model.layers.54.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24633344 } ], "md5sum": "af4c9afbe8f0bee1d8dcb495e9c3d741" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "13623aca9721aaa537c7fdb06f223484" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "6675fac4186f6db14053b3e09787eff8" }, { "dataPath": "params_shard_324.bin", "format": "raw-shard", "nbytes": 19382272, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4210688 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19349504 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19365888 } ], "md5sum": "612c1082add037fa555cec64b94c13e0" }, { "dataPath": "params_shard_325.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "de07b013b36564474ff1b20fb42d3cd0" }, { "dataPath": "params_shard_326.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "72e4c2da96f7a966c6d245a41bddbd7f" }, { "dataPath": "params_shard_327.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.55.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e8bfe659d1be558100306724f09e1f60" }, { "dataPath": "params_shard_328.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0ba7a8135afa19b0e96dafe78bd3145c" }, { "dataPath": "params_shard_329.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "3311af9e82aa635be2dfbd1c67c37113" }, { "dataPath": "params_shard_330.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.55.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.55.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "17335e7c2de591d63fc44f6350ade9f9" }, { "dataPath": "params_shard_331.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "c756d02ed68848af847c6e67913cf914" }, { "dataPath": "params_shard_332.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "9e384fc6174f245315f37481e035816f" }, { "dataPath": "params_shard_333.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.56.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "73387015664f97bb52610a0ae1376f1e" }, { "dataPath": "params_shard_334.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1aba3e3c8ddc27dfcf5bc47710968c49" }, { "dataPath": "params_shard_335.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "1cb3b7f3489befa2dfb7decbda8ecd9b" }, { "dataPath": "params_shard_336.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.56.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.56.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "7ac48986fe87010eb3ea131576f4083d" }, { "dataPath": "params_shard_337.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "6a91c4a7724a8af12f85149ca07f74c3" }, { "dataPath": "params_shard_338.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "539bcadf0ad983b240f28f457a7c0f30" }, { "dataPath": "params_shard_339.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.57.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8004bdbada0ceb809a481d5ef402d385" }, { "dataPath": "params_shard_340.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "399f308541ee0569c71898209e419855" }, { "dataPath": "params_shard_341.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "1a3498eaeaefbdb460c6d1fd52bf43a1" }, { "dataPath": "params_shard_342.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.57.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.57.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "66805921f3a88fffb5a652407b7143e1" }, { "dataPath": "params_shard_343.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "d0de88e79b9251ffb8e7a8c1dde57321" }, { "dataPath": "params_shard_344.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "14fdfc9a0b0f28ca5b99e236246c9ece" }, { "dataPath": "params_shard_345.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.58.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "384359d435b747f1ad86a0f376437f05" }, { "dataPath": "params_shard_346.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "91d71bf3f95da41eb7261d92be3a44ba" }, { "dataPath": "params_shard_347.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.59.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e4703de8018d889569eda0f9a847eb7b" }, { "dataPath": "params_shard_348.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ad051dcc8c31372d5f6250ba497c7521" }, { "dataPath": "params_shard_349.bin", "format": "raw-shard", "nbytes": 29876224, "records": [ { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.58.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.58.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.59.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24612864 }, { "name": "model.layers.59.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24633344 } ], "md5sum": "9d8b5f43b5984d75b7b4ded1fbfbbd7d" }, { "dataPath": "params_shard_350.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "b8756756e044a8a736c195f113256515" }, { "dataPath": "params_shard_351.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "3204dbf46cff779836596dc0005afedc" }, { "dataPath": "params_shard_352.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "ac3b1a428631a3f4132664ad844dbb2a" }, { "dataPath": "params_shard_353.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "1fc4ba9906daecb92c1fc2a5902ee11d" }, { "dataPath": "params_shard_354.bin", "format": "raw-shard", "nbytes": 19382272, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4210688 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19349504 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19365888 } ], "md5sum": "c3a12d0a5350670fffcb4155fae0ac8f" }, { "dataPath": "params_shard_355.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "b3e967833f6769d590464528425589b0" }, { "dataPath": "params_shard_356.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "f994830049ba969c561df8d33953c228" }, { "dataPath": "params_shard_357.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.60.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "603764e0763f1d017e586e4bee82c0bc" }, { "dataPath": "params_shard_358.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3585a253cf9542efbfb3319f5c186658" }, { "dataPath": "params_shard_359.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "dae2949a751b58d0c1e0bda6bcffbc70" }, { "dataPath": "params_shard_360.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "7a283ad2697d20842de014195222774d" }, { "dataPath": "params_shard_361.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.61.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "378d5756024cf10ab1811f3fd1d94940" }, { "dataPath": "params_shard_362.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a52599bdb961e9ec0d86476609b24492" }, { "dataPath": "params_shard_363.bin", "format": "raw-shard", "nbytes": 29876224, "records": [ { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.60.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.60.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.61.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24612864 }, { "name": "model.layers.61.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24633344 } ], "md5sum": "500869f6882003d889855528e8221e86" }, { "dataPath": "params_shard_364.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "c8e0f0255b42f9832cf1e495e75468be" }, { "dataPath": "params_shard_365.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "2bc7b58624182fa4f5e98a35c06f96db" }, { "dataPath": "params_shard_366.bin", "format": "raw-shard", "nbytes": 19382272, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4210688 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19349504 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19365888 } ], "md5sum": "75684dfed5dc31906da5079007c2986d" }, { "dataPath": "params_shard_367.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "d1b97c20714f3de1efbcea36011bf360" }, { "dataPath": "params_shard_368.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "860b5aceb17f1958e25eab128ba7505b" }, { "dataPath": "params_shard_369.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.62.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3852f22941de0850145970ce589c92bc" }, { "dataPath": "params_shard_370.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8cd93b79ebd850fe025c03f23b6c40d4" }, { "dataPath": "params_shard_371.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "861e503b62efd764cafd5775aa0023b2" }, { "dataPath": "params_shard_372.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "9e6ca3425d6928dc4013adcf80fe848d" }, { "dataPath": "params_shard_373.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.63.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c051366cf7c12b459a2eec94c4ccd364" }, { "dataPath": "params_shard_374.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ba9c29d8ccb40366b10638bc2e9b173c" }, { "dataPath": "params_shard_375.bin", "format": "raw-shard", "nbytes": 29876224, "records": [ { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.62.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.62.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.63.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24612864 }, { "name": "model.layers.63.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24633344 } ], "md5sum": "2f6485a22443c08b4e6c9d54b0877cac" }, { "dataPath": "params_shard_376.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "2b9ac6ead69f9cbb34d84a01a47a3944" }, { "dataPath": "params_shard_377.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.64.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "f291cb32cd2ad0229c76f879321429a3" }, { "dataPath": "params_shard_378.bin", "format": "raw-shard", "nbytes": 19382272, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4210688 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19349504 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19365888 } ], "md5sum": "8a72483fdedb148cd73034aa08bb4b4f" }, { "dataPath": "params_shard_379.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "1366cd66c642bd7f48db7de9ebabde67" }, { "dataPath": "params_shard_380.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "898eddbbe20f982069438ec7f210c4ab" }, { "dataPath": "params_shard_381.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.64.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f32acb7eeb015ab1b8698c6b37bc23e3" }, { "dataPath": "params_shard_382.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "07c182260e47ccb4fa675cace2fb06ae" }, { "dataPath": "params_shard_383.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.65.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "25e0598dedc7b596a71f389c7ef7a706" }, { "dataPath": "params_shard_384.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.64.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.64.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.64.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.64.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "9e5e57b9dd7e5486605f686cc7adbc04" }, { "dataPath": "params_shard_385.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "8102d113eef5a76617caf66b20fc0b5d" }, { "dataPath": "params_shard_386.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "79c25c2e5848570e1280fbac1207d9fb" }, { "dataPath": "params_shard_387.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.65.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8c2fd79c1a0daefcd1e1fcedef0d4dda" }, { "dataPath": "params_shard_388.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.65.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "19f037871b43b9dbeefe0d882c5ce9a5" }, { "dataPath": "params_shard_389.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.66.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "c23b1feb32573c54050bece49acd9948" }, { "dataPath": "params_shard_390.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.65.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.65.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.65.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.65.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "0e8f43644a3752c38b0cff2170fdcd54" }, { "dataPath": "params_shard_391.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "c173abb2975eb81dad474fdadc2dbfd5" }, { "dataPath": "params_shard_392.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "ee2b73d543b168f88461a67ea456b3e7" }, { "dataPath": "params_shard_393.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.66.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8d5a3ee7a52f446baf5e14137e290605" }, { "dataPath": "params_shard_394.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.66.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cfcfe4265f350a68b28331dbebd4a0dd" }, { "dataPath": "params_shard_395.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.67.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "9e66778d86b0362c0fc14d9cc9974a2a" }, { "dataPath": "params_shard_396.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.66.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.66.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.66.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.66.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "ddf24bfa76a5c960480da31d0db7db59" }, { "dataPath": "params_shard_397.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "1821dab0bc646191f18298d81279fb8f" }, { "dataPath": "params_shard_398.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "0e67adb32b1379ab5ef248ce84fa26bc" }, { "dataPath": "params_shard_399.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.67.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c61e983c17d98c47b09df75d7d0b89d8" }, { "dataPath": "params_shard_400.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.67.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ade972a27359d4fa55b56f538a90b689" }, { "dataPath": "params_shard_401.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.68.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ce35c889efb3635e82199b5c0b546036" }, { "dataPath": "params_shard_402.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "758609781951e9f80e29d032f101f555" }, { "dataPath": "params_shard_403.bin", "format": "raw-shard", "nbytes": 29876224, "records": [ { "name": "model.layers.67.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.67.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.67.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.67.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.68.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24612864 }, { "name": "model.layers.68.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24633344 } ], "md5sum": "f2e0af3a02eefc5136c7b8bf871ba21c" }, { "dataPath": "params_shard_404.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.68.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "12cb97b1183632cb3beedaab30a9b249" }, { "dataPath": "params_shard_405.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "37048a5faa7b63d7d4bc8feff95fa4e9" }, { "dataPath": "params_shard_406.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "9a0e10c36669025284f43157d8cc8cd2" }, { "dataPath": "params_shard_407.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.69.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "6d65a9e28bc037b1ba85776558cda71a" }, { "dataPath": "params_shard_408.bin", "format": "raw-shard", "nbytes": 19382272, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.68.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4210688 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19349504 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19365888 } ], "md5sum": "c9f890bcd2340716d4df1061acc3970e" }, { "dataPath": "params_shard_409.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "08a4cd3d5e03462e13ca49dde3bd12cb" }, { "dataPath": "params_shard_410.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "7fac27774074cf07600c68646532c62e" }, { "dataPath": "params_shard_411.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.69.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "642b35f308e8ef2f2555973014d2e8bc" }, { "dataPath": "params_shard_412.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "54d3cd77cec331d82831aca4b55ed3a8" }, { "dataPath": "params_shard_413.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "e9711e73a346a3563696e4b547c4c690" }, { "dataPath": "params_shard_414.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "f3815fdf4dfd3b7a2338e23deef3431c" }, { "dataPath": "params_shard_415.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.70.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "85d565823923e6726a2ab7ce09e80b33" }, { "dataPath": "params_shard_416.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "519448cea1e4f94bcd731ff0bb3f3a82" }, { "dataPath": "params_shard_417.bin", "format": "raw-shard", "nbytes": 29876224, "records": [ { "name": "model.layers.69.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.69.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.69.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.69.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.70.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24612864 }, { "name": "model.layers.70.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24633344 } ], "md5sum": "d3579b98fd56e09cae96f7ddfeae448d" }, { "dataPath": "params_shard_418.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "69216bd81bdaf832fe5c553e8053ed00" }, { "dataPath": "params_shard_419.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "31428ab3b76ab1b47334730878a6c1bd" }, { "dataPath": "params_shard_420.bin", "format": "raw-shard", "nbytes": 19382272, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4210688 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19349504 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19365888 } ], "md5sum": "b1c86fa8bf11a058b43c84e6bf454385" }, { "dataPath": "params_shard_421.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "e281af21f49d7491e400d8c7441a1d5f" }, { "dataPath": "params_shard_422.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "4f0e476ca223f7f7b2fd3d34d6667272" }, { "dataPath": "params_shard_423.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6dffa03b67517c95728f170cb282af45" }, { "dataPath": "params_shard_424.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a7440242c907a61d2b1e0b4140342ef6" }, { "dataPath": "params_shard_425.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "56f5cc333803fdb0525657fcd964a5e5" }, { "dataPath": "params_shard_426.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "e781cff5fbfff24d8e17eb46c0e1cd58" }, { "dataPath": "params_shard_427.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c6a8a2629099fe8f130caaf41edb3169" }, { "dataPath": "params_shard_428.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "be93407f1b986767b57215e11e38a90d" }, { "dataPath": "params_shard_429.bin", "format": "raw-shard", "nbytes": 29876224, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24612864 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24633344 } ], "md5sum": "0c4594ad123b96aa21f36f34a17b6c9f" }, { "dataPath": "params_shard_430.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.70.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "2d266fd49934dbb26bdb2fa61164f626" }, { "dataPath": "params_shard_431.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.71.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "8f850db3533b29e3776bf2143c8f2927" }, { "dataPath": "params_shard_432.bin", "format": "raw-shard", "nbytes": 19382272, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.70.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4210688 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19349504 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19365888 } ], "md5sum": "feb8bf695421fbeea29b1bb2d83aeacd" }, { "dataPath": "params_shard_433.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "b473aff6d743be92c3ae468947188d34" }, { "dataPath": "params_shard_434.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "8f867f3374fb2aa629c2394c281f0437" }, { "dataPath": "params_shard_435.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.71.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "511d82073473bc16262816d011782d78" }, { "dataPath": "params_shard_436.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.71.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7f3ea13087408fa1276248e68429348c" }, { "dataPath": "params_shard_437.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "82302060fde6a0c1ecb6c90e0a17f2ed" }, { "dataPath": "params_shard_438.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "7bf74af1247a5512a1796ce29a7f6cd5" }, { "dataPath": "params_shard_439.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.72.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "365f4194972a7985b69121404f858468" }, { "dataPath": "params_shard_440.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1c1b2426fac7da7353f31469015e257c" }, { "dataPath": "params_shard_441.bin", "format": "raw-shard", "nbytes": 29876224, "records": [ { "name": "model.layers.71.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.71.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.71.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.71.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.72.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24612864 }, { "name": "model.layers.72.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24633344 } ], "md5sum": "a3e55950eaf59b46267d242c4dcf2021" }, { "dataPath": "params_shard_442.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.72.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "bd6cc29f07c6c547ad7e490be7308d20" }, { "dataPath": "params_shard_443.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.73.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "7cd1683fe424bb788e6aa26250d1f74f" }, { "dataPath": "params_shard_444.bin", "format": "raw-shard", "nbytes": 19382272, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.72.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4210688 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19349504 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19365888 } ], "md5sum": "af50be7c5f704bfbc984ee5fc73e5761" }, { "dataPath": "params_shard_445.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "d0d417fc42b0b60b98020eff921d25a8" }, { "dataPath": "params_shard_446.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "ca42b8a35e229e647e0e9ac173028ccc" }, { "dataPath": "params_shard_447.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.73.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "cccd036fa4d0962f9bc421011421b063" }, { "dataPath": "params_shard_448.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a535228bc09eb9a13d445c9db46ad1f3" }, { "dataPath": "params_shard_449.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.74.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "2ee9410791d29943422ede710152a9d6" }, { "dataPath": "params_shard_450.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.73.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.73.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.73.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.73.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "2a9226e5a3653324967fe85429ba4329" }, { "dataPath": "params_shard_451.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "a665b0d3faaf7f27de9f410da91d2a36" }, { "dataPath": "params_shard_452.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "e7000c1e6c2aa0aa20fd5e641dc0e9c7" }, { "dataPath": "params_shard_453.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.74.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f6317ed839a06e02e67fd2fc76ff676d" }, { "dataPath": "params_shard_454.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.74.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "df6b8ac824ab692ce2da36c809e83f68" }, { "dataPath": "params_shard_455.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.75.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "97046d872b3aadb51abe924866ccdf97" }, { "dataPath": "params_shard_456.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.74.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.74.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.74.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.74.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "c01642b435f6779243df70d43d95ee36" }, { "dataPath": "params_shard_457.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "179fb8fa72ddb6a2d8c7ae9ac74546cc" }, { "dataPath": "params_shard_458.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "577509f422846b37322412f6ff0861dd" }, { "dataPath": "params_shard_459.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.75.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ef01adf4e428f2f4a4d565c9f6de6422" }, { "dataPath": "params_shard_460.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "619fad262d2b8cbde84067031118042b" }, { "dataPath": "params_shard_461.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.76.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "444e4aac5a215328391f507d5347f051" }, { "dataPath": "params_shard_462.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.75.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.75.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.75.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.75.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "fbac8ffc40d02cb0c3daece0ba1e2be0" }, { "dataPath": "params_shard_463.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "71244c49fc240a3726c8875fdad997ff" }, { "dataPath": "params_shard_464.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "0a2ac62f8a5bcbb79a836ac6a128dc05" }, { "dataPath": "params_shard_465.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.76.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "27e04959714a57ba0da09bf993a7c45b" }, { "dataPath": "params_shard_466.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.76.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1f40752bf6b367ee0b11715539e2cd7a" }, { "dataPath": "params_shard_467.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.77.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c7eeb9ede25da1f2c08eab1eb80f57c9" }, { "dataPath": "params_shard_468.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6b17b6647ca7ea83d306dd422c259b07" }, { "dataPath": "params_shard_469.bin", "format": "raw-shard", "nbytes": 29876224, "records": [ { "name": "model.layers.76.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.76.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.76.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.76.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.77.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24612864 }, { "name": "model.layers.77.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24633344 } ], "md5sum": "3df8a294be75effc8e38cb087ee065b0" }, { "dataPath": "params_shard_470.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.77.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "f1de750c63fd7fbf568e20567097b04f" }, { "dataPath": "params_shard_471.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "40ced5f814bc16c779899c6d48ddc4b3" }, { "dataPath": "params_shard_472.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "ae50bd9f83602fb110a22585f599726d" }, { "dataPath": "params_shard_473.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.78.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "4b50ba6e95a20fa72ddceecead177e17" }, { "dataPath": "params_shard_474.bin", "format": "raw-shard", "nbytes": 19382272, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.77.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4210688 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19349504 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19365888 } ], "md5sum": "9721dd0e90f269ed395b823ae2ef8d18" }, { "dataPath": "params_shard_475.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "fffc18f71faae5177910f84417cbd6ba" }, { "dataPath": "params_shard_476.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "01c70600cfa286beaa766b86fc954acb" }, { "dataPath": "params_shard_477.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.78.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6da949382437dde33268eeba4535a023" }, { "dataPath": "params_shard_478.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "75f8942fce96641b953c1844eb736ba7" }, { "dataPath": "params_shard_479.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.79.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3efd2b96df9b8c2162a8c481c29028cb" }, { "dataPath": "params_shard_480.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "bb37b28b11f6f5d2cbff130bd2ccf285" }, { "dataPath": "params_shard_481.bin", "format": "raw-shard", "nbytes": 29876224, "records": [ { "name": "model.layers.78.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.78.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.78.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.78.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.79.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24612864 }, { "name": "model.layers.79.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24633344 } ], "md5sum": "930256ddbc0037356c0be0f646c2f7f6" }, { "dataPath": "params_shard_482.bin", "format": "raw-shard", "nbytes": 4194304, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 } ], "md5sum": "bcd19ca778b5296ea020b603cf772f7e" } ] }