{ "metadata": { "ParamSize": 885, "ParamBytes": 45448593408.0, "BitsPerParam": 4.619993414045882 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 622854144, "records": [ { "name": "lm_head.q_weight", "shape": [ 152064, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 622854144, "byteOffset": 0 } ], "md5sum": "0df89de62bdebb3a4473f1629e934d91" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 77856768, "records": [ { "name": "lm_head.q_scale", "shape": [ 152064, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 77856768, "byteOffset": 0 } ], "md5sum": "65eed05318f1d153f4dfd8ba3765006c" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.79.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "f8507b8b5c600fbc2a1e3897134eca28" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "134e58ee25f63566783f5562813883ee" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "bb4dd9e8e00d3c529771e16e6dd4f3b4" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 622854144, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 152064, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 622854144, "byteOffset": 0 } ], "md5sum": "89745fecf3f5e2a0ba1ae2e63554c14d" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 77856768, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 152064, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 77856768, "byteOffset": 0 } ], "md5sum": "1597765475ecee93f04e3abd0bbfc2ff" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "4e4553caf37649d601c37a1ce9aadb05" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "40eb41e069c3dd6f022c63ff79f95876" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "689a8692fd519543be90be3752edeee9" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2f5ed52c35f33ae2890f03729fcad661" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "1ee6306315a6f9d83fe4a99a95ee6cb3" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 24645632, "records": [ { "name": "model.layers.79.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15155200 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15171584 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15187968 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15208448 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20451328 } ], "md5sum": "7518931c4bcea0a8eb1db83519e62c20" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "91f24c7722ebc5cf2a6cd80a3b393101" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "0025f2e1661c14392bb6fb71875111f2" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "dfb3730774e5c48ede7ca36ec8a686b2" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "034adce4ee1474389d6455cd2c2f0604" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 30330880, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 15155200 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30294016 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 30310400 } ], "md5sum": "f641f10dab033b2318726dbff9678c98" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9a55e0a3c89023aac4a019f78a967e9e" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "2b2bfc76bd9730c490afbd0ca6ef5c50" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "38ba0bc4073e42d2e94ff82e4406dc99" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "60a3774a5c657c9da1927f02964159a0" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "17c6bef8729a18a8a9a5559e557d2d35" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b81b81542336dc5926854e5d038e5715" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 29872128, "records": [ { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 9453568 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24592384 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24608768 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24629248 } ], "md5sum": "a16213a5ed49f2900c7be70c16c2e512" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "feaf2bae2e219a0e5bbd94c1fcbc9010" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "793180ceb583fe0b999765096fc23ca8" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "cc94e5218431b8e4bde28ebd33735b10" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "bc6df3a491a2b7168c72e53bc576ef64" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d2bc44ff1129d412cc20e6e756264fe7" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "4d3df17f439aa2e883a565d5fdcb560c" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 28856320, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4210688 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4227072 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19365888 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 19382272 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 19402752 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24645632 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28839936 } ], "md5sum": "3227bdcc8e633d7938893d9798f77d4b" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "20a6b705bde6f85a612efdc135f48f04" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "793279990c094f7292b169aab075a676" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "acb4c88f3e7a44bf232ecb61ebe458fb" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "803c0ecf84b8297e35c82c66023f426e" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "3e59f43a35177824dcbe14f7c977dba1" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "7cee83662c95fd71618de66c1a08b015" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "12d529b92353bc394efe5b1309f5dabe" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "f8a72f7835ef7fb719aa43e44df5bd0f" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "ac6b855af38826295df2d1674adca662" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9b13d7b5899787780cb5a6e2123c164a" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 30314496, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 15138816 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30277632 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 30294016 } ], "md5sum": "5e8ef952cafb84f7343a92d577525f32" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2669092359bc98c101770244df9a4931" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "97c4ed6666ade2ca184e486ecde19dd9" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "600e710b156cdfc877636a1efef105d2" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "b990b3c607853e89165fb0838e880a75" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "75cb6e0f248fdeb675c6ff7c8525ac9a" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b092408bfc8b35e4761f0fc4b165f881" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 29872128, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 9453568 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24592384 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24608768 }, { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24629248 } ], "md5sum": "f1ecf596940c6b17971ab13d689b0bd4" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7e892d5db1f91bb507531ac044695ea1" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4ddb2106fc60d4063208282c56d12bbf" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "4b6e0322ac233b486ea73cdfc75b0141" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "8540d3f3f1cb48deed46171d60d5d169" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "07051cfeb3793a39e149e2b30469a47b" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "cbb383ac91bc16002faf0f88998c3f66" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 28839936, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4210688 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 4227072 }, { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 4247552 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 9490432 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 13684736 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28823552 } ], "md5sum": "002371e36b2c147304bccc7090a0190e" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "b75d4c6434c911a086911820d2f3ce6c" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "338ed2d928fc8dfda3791f8e628861af" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "12b14cb0a9eccec194f6e9dfc2937fb1" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fe2723fb64f5fb3fb79d3a039a717c49" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "b358cf9cc04034d44313e51a01f798fa" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "25c874e9711cdc49835a437eefeb2f0e" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1f0816b4746c4f87dd7db231398c5f2b" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f7844351dc2ba10518712d1637e15e71" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "8dc6656a6a874fd2f3281c4349e0090e" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "3efd126fc6df882ef496f90531afd860" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "8163e5645c2663e441a1b4c320f3b463" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "b6f5c0827b7a3081953e558783a9fc6a" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "160790a27348d18e8b01accc98f39950" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "8f477dc37738ef4fa3aa3ef9945197c6" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "283ed257b5ec237e7938e1bd3a41c78d" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a8ab68371ee9b9341d4f02cace8f3271" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "6f5e1e5543be17dda7f0410fd97c60b7" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "bb695a08a5693ca93fc2103d14af03f8" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6c94f11513b260493c577409d7d6598b" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "09fbe2d8313f66a5d5bb57a606366c5d" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "a4cfaf3866229d9e16c4604536960282" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "789e6904f5e9045d9884052e7adcb2f1" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "60ea4ceea0785fa0ac79eade33c5436f" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "0bf453cf1a58d3006ba8bc9dea797a59" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "2c49139317ca11a940394d13068d11e0" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "c4f95bb1c392ec72b1570c4427f6fe3d" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0266836c63fe0e4d13233d170a3ee3c4" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0bb3f7dc3316009c14efd1ee5e52179e" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "ae96593eddcf7ce18cdd5233eeb8f8f8" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "71e6ccdfc22d38be88b328f957cdd35f" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "fc57473ecbc7fc9612904155e5842c6f" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "ca8673c00cdd58656bd8c4f510122b32" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "38e5fb42b542da0e475d72a2ed259fd4" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "afe4a1a392c0647f7923ffdea13530a7" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "b3a827b5067fd7be63cf68e1a9305ef3" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "005e87a90c966c06cd47a97fa9f3bb85" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "7f994546c04590cb197f185b7e1fef22" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "2d3cae478e3254301285204011cfa4a2" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ea974adc0937591c83190f71bd7145e5" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fc97fb53a14211bb73f24dc0fb7f76cd" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "300e6c443e8fdf3fc9fd065d5a6e1247" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "48e2e9921dc67eed8f47dc5481d2cbf4" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "fc527989c58190b4d25d4148e7c3b8cd" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "4ae9d8616b64d0e96571678656e58023" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4b7ea393a01caf4617091ca16677343f" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "511431d836384c6ff3ab3cc5dc226929" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a7c613c7cde8521fabe5c11fb16b9213" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5f046a4d12be96e3a9e385cc9ea9a5a0" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "b319337c9dc884b1861ed9920ea6610a" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "c9475226da8d6a7bf21784df0ad555d3" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "60ff7102e1d667b74d5b775822b5b9db" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "c30774f94480b5d22bcee797204573a7" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "952a55330c2199497a0b1a041a013bf1" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "53443b948b052f549a52cba8e9dd8b82" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "2b076fbb46b8236cd30386f1f1619fec" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "a788e56d1aa0358c7084bbe0d7afdd61" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.24.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c2baceebb97606ac9bf9b3ed15517f28" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "52e00b92638f4cd27ad22c5b680ad41f" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "1639e18abd131ca6737fe5df42207c28" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "4e00d1146a9ae307387018a2fc10b179" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.25.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8ca5488c5cba7e94df0b650919604c87" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1a3cfeac02742aec10ca43de61c31c5c" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.24.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.25.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "f69aa4411253fedffcf03f6245a2ff1d" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "455d02c778a338baa35f243747482aa5" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "96083c37144706ac985d655da0c5d9de" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "97a33acdfcff508679dc4693258a5c92" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "a861bab9038da7c9168875c45d286cc1" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "88191647aa3befbfba614b8abe11df66" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.26.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6bfe1f7d1942e1608d6f6d2e20ad4d6f" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ee9fdfae9dc1b14e9e33911e059de089" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "060305b4131f17b7c298d3326a5cc56f" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "e22eb24d1b7395fbb719c0732208b9f5" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.27.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a9ebde96220d2516f6f9d0a5ce834a08" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "64e69a820d3faadbd79dd4459cc8f359" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.26.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.27.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "5fc3017683d074719509ad60a22e2a27" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "631b46c76562b105454d086803fd3e89" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "267dbd6f376308f1032c738ea140c778" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "5b120545c6ac593383109b6b4ad20a2e" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "2e00c45219744ea1b9949111a304b552" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "6458ca78da4cbe7566b722d44b308f46" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.28.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0b52597647eb5bdf052bd4c8a2e43e57" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "245d255888a68fce9d53593012005f20" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "a6c30f51b94c1ae5d435169e8ea1d7d4" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.28.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.28.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "c2542e8f541f4cf70d90581611d4da48" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "400f0b37abe02728997ade3fce4d60db" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "8222c54fe746eb845c96bd3e0da9bc23" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.29.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f169d736b8cda80e1343af8cd5f8df5c" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1bd46f706a7f8400534910bd20a016d0" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "ffdd189e7c1d355956d7ab22eb93cde4" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.29.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.29.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "36351d8c2299dcbaffcf5ea3b4d7e701" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "2804b1ebd40071499a17e4949057ea70" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "cf972613de167e1938ee5444bf16ca05" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "fe8da341bf08ddea6fc08cb7626128fc" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "296b52bcd073dd6ebf762a3aa9953c45" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "0f73d7cc86b418f1f1650c0e71b332d1" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "e99091547111a382e8f7db25464eb534" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "87477a4eb1683cb6d93961b1013dc141" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "476c7c9c0cc8ed1856f66c9c7f278e69" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "229ef2b625c6b1e18478737f911676fc" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2c6220224b789e67198fc7893f767ee7" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "aaedf9a8b63dce087759c8ad3eb94c01" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "be71a2c9de665ae4f76e76e19d6b61ff" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "1f4f4ce64a7aa771c8476bcd38a873b0" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "55ec245d69e8b94ddad0f7e4c582963e" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "a2d5a84208ff6dc27d0386bdfe4c9160" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "278dfbceb0d8d9d93ad6c0dc86de7ef7" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.30.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9d0dbdea83870660a8a3b1e5f95871fb" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c2330bd31b1368102d34db0d1fea3635" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "6778d3b1f95e53090b34a8b66e251b67" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 28823552, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 }, { "name": "model.layers.30.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 19349504 }, { "name": "model.layers.30.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 19369984 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24612864 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28807168 } ], "md5sum": "da3b151db6fe791a43caa2fbe97c953b" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "53159b085577e7f4d642aeb874e4b32d" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "11fdb2bde8021cf5a49b6e053786425c" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.31.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d3771040848ddfd0538bca2256481a7c" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a3631d3b4e4ed1cf8931ca8dfe8066b0" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.32.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "97bbee6e5808853a6b45776d5c961343" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b16371cb8b97a80905691c40738a0c60" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.31.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.31.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.32.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.32.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "163e6acb80752317f594aeb52c62c0e3" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "9e86e3aa443c4dea827f5405eba30323" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "f516362054c4f89be869e9ff63c9ebb6" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "5a47da44f92859c24d548422249be3ba" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "94874e0a86a5e1ad741367bcb7ab6cb8" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "1c53cc70fbacaa45881175847d7c483f" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "75e25e4bf81b8c780c0b045c5047c69f" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "1dc4d48a7681065f0dbbe20d6893ddd5" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.33.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "df7b25406aa424a65be70232c27de371" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0fda7f6a4ca9978b501ff2c0faf6f23f" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "ac9a907da9bc0568f48fc1e40347b512" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "568353e097e3ec049b8570add3af60f3" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.34.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e47355737b04c23b2421a536220cbd5a" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e025ea2a173a8de4790757bbbf8fa006" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.33.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.33.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.34.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.34.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "8014930bbcfb886a5150c4ad3e77987a" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "0c5ff6f3346613b9bbdbbd4e62f24ef1" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "7f632fc2dde520ab23d76218c670b8d1" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "53e2502de8838bd36b729e760739c80c" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "53966d294e870dba61c58ae46ce8fb66" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "620ea13875f02342a0796451cf5dbadd" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.35.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ac53c2f710cbe4bbc7f56a5f7066f7fb" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "054137428043098a2908948586ed1038" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "13e144a981d653e7181be2f5d29f12da" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "bf6dde4763312d1bd65678d8f64ef56c" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.36.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a5ec79b183edc89abbb692a6224de4d0" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a752be042097da3bd6b334f0045fbd0c" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.35.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.35.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.36.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.36.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "a890295c6a75698206f702e44d08866b" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "0204016fb1aac56413bd5f84133b68a7" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "0a3a447a9f18056695100d3d6315434f" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "d9ee59eac7c87734b6019656ec9f9db4" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "ad45202dbafdc8c7b2bf24f293245b8b" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "6068960e0394abbf3eebd3dfa709a9f9" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.37.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c8a4010f44232877021a8d957d078049" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f378be4343075ddf4ea228d485ffc08c" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "1ffc71e813b120704d5634e48b9a59b9" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.37.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.37.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "384a9a5ce9425ee52063e9a9cf12db0b" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "baad6721e08a9fe32f65826d26946b2e" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "2809248bf87ba8bb76842941ea3459b6" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.38.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "62b565807dbe94c598fa6d0dbb4f62df" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cec03ceeb7bdc0a0604bd721f01d09d7" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "c116777b978f8a6637787f3dfae9110f" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.38.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.38.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "1a1af37903249862c7e0a2833d1731e5" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "d047c1e02b95fb50e17b1e653086f9d5" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "dbcb7c79c6b9e3b8722a3243027925b2" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.39.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b5ee5dec964a75440997140e5bc08c6b" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4facf17bdee514615e8e40c42e088904" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "cd96ddfaa56b25925800c5836abded5f" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.39.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.39.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "b3f3aa732d603f704bde7d1ca7f13969" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "4bbf2d259db424576cb78e086acfb733" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "a6ea9fe054a353dda1a4ac418067e57f" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.40.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "338710a645f0dbec17463ea3ba872ca2" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7c3c04210cd651cf9f45d97aea874108" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.41.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7748dd5f643cda9831ecfab085498f9f" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9c738026072e80772c9f505353196cf1" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.40.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.40.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.41.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.41.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "28e3b2b43653ac5817fadee4632f8891" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "be6b2fc6b4dc335a153c33e7bfc74891" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "5b9077ab4f89d47a2987a7461a286a5d" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "e65ac3fff47a4a35419ab889f5092180" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "304177ccf60712554d11ccf8a2f0e18e" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "553e820223ddd3c07394ddd5bacca940" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "bf5804c894ce6556007691ca70b15725" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "5afd9ce33d0b16885fcda55937805c0f" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.42.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1e0234045af11adb468d71c345f614b7" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0b94bc9e754abe05df8c700132f88397" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "fb4723677693e8403e2186853d1d4f45" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "80d89c2ecf9be3d5c743a86e16b94f31" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.43.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6a42b420555ea2c9816d25e058c031b0" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6623d68405e7fe5ca80ecebfa6549838" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.42.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.42.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.43.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.43.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "f63c222a98a7d85e854da1c7a796c1c0" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "68ae15ef076f6a47efb5508651842ca7" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "bbf07fb564fef622d41b511aabb83e32" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "fb8274cea5c26753334aa959bfbfc740" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "9f77fad2cb8e8bae87ea9d6a3059c601" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "43a71311f3f8a604b7915d3cbe3d167f" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.44.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a2ab999f3be6ee3a477b6d50ea629d36" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9c96c0d857768649eaeb748554f09ec9" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "4bee1bcb6d92889b3d8db0683726b1cf" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "90a244e2967868db5ef4a7cdbae339ca" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.45.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0314a55d467ee5d465ed61dc7abe79fc" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8fb1dd71554af063c8d42e11f7bc3eed" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.44.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.44.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.45.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.45.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "a11dd67d9e2b97fba53225dab9d0f7b5" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "1f61a78c4c42a4196fcc9385fb183009" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "1def27976a3cc4a3979da9e389c417a1" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "6f889dab3849069f820ff103ce3e44bf" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "d3e7eb122cc35309c0f03ed9e7458cf2" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "22123095245b065485c57619288e17fc" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.46.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "325106aef95ee2e14c3ccb746733cdb8" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "01c8dc91c30cc94ec675d09ede6e8805" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "5811dd8b0d41723aa6a70b867cfde288" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.46.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.46.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "52e9a4f50b0e0b24ea9bf8729630d3d4" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "0bd1ea62505667f6ca0cc103aada6f8b" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "ae7d09249f9a716a5e269884b258ae18" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.47.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "fd6f0ce2de7072d5815b27b22ec7ee3b" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ebbb717d9142305f51e5a53eb1b9c275" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "9de598d517137a2001c586af731fb6a9" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.47.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.47.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "f13598b86f2530063da39b12ef298ddb" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "06dae02b975ee9d15cc7a4216048b483" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "e40892a4b79ba3ca5c3ae6e660a9cbc0" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.48.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5a35e81377227fec7eb7c98264d1855c" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c076d8105fc628dfd7233ef7fecf110f" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "a9444a2bf073da29b0a4d1be5959d31a" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.48.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.48.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "8923fef605464cf8f66614c0011f796e" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "47347b86dde4fc859ea094b7b0f5d760" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "04f39258203e60432cb7665b90be1991" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.49.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0e7b1bfdc7528272bc19de517f969d15" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7678ada56c18e064f6fdab1d74130caa" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.50.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "be524adfb3b424f52674b0a117d11a86" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f6011555feee0ba0523ec1b7b8009f05" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.49.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.49.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.50.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.50.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "c36fe32e2e962b390901d4e38cd244b9" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "54eac89146c6805eeacda8505b95175b" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "3daa2b45939310808e7bb18ba9f13e9c" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "104ee20875c2ea0836c46b42e910197a" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "12a8b48e225c771021cd513669fdc8aa" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "0ba4ccb616a94ba68ec9fa2d3c63f933" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "ba9d1b38aa677bcedde1594fe5e0d5e6" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "54ef9714b2f0bc3742a8e27f0080c661" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0729356854530c571b684e481d1dccd1" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f1f3e509bbff1cf5c3fcb4b55189baf4" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "bfca716098af941ea4c36e6e0a6fb2ec" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "af39ca504d9c47f45b38bdd401b43c1d" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "88496849d66ef6088ee3c0400a342690" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "42743bfd264b813220f5fae21a34c869" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "792b100baf4860aa91339e758b7f6379" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "4c1314fafbe903e0e48497bd4c1d0b9f" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "a65e522faefe427e4b7eaf41e7934ca6" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "57401ce3a482267418a5c03aeb29f3cc" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "07a7abdbda639097be1347805bdd1809" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "d7bd4e245ace0bd41760ff4ceb98df01" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "5b242034dbeaf0bf043df95b4a35296f" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "9845a403411f29677c41fdc66b3e7817" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.51.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c4595953ddf9acee2302ffe83bed4ae8" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f9cc70eddcf8936bbd3f0e09d3280fde" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "e8f1057dbf86c8f5bf7e4e9aeebe470e" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "530809c31c9b258aea8659e233afd3bd" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.52.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "85e9b06c05c82a0df58e408609528625" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "77440a09405a6d3b2e26d72ca906c195" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.51.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.51.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.52.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.52.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "cce1d6d41df1da84d626e3752cb7c11d" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "c381ef413750062ef2ebae1114144530" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "0b311bddc3419a7cec74b123ef491ef6" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "10c7f8c9f433db8d49f52110ab602c7d" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "1fa58a3b82a4fe4b7f0cfc76476eb0d3" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "be839b716b4ab1ddd2c10f399907551d" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.53.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ffbeae1fb433c1b21cd3c61f2fbb18e2" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0b890f671e9d7d54dc82f0ee230add67" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "d784bb64d62e79d3a393dbdbbc6cd62a" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "1383dcea76390739062169e6859b8eb5" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.54.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "34c1811179d16db2ae173f45d486f4e4" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d4b2a836abc46d6fa49f9bbfc29b15e5" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.53.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.53.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.54.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.54.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "b8c658ab1a1639e9336107e8bf3fa182" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "9f7fe86952c302c2a396aa8d45827bf9" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "dc133d505d085071d55abfac4f5b9d2d" }, { "dataPath": "params_shard_324.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "76d8a29a3bfb4d43b2f33fed88ab1e45" }, { "dataPath": "params_shard_325.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "0b083fbcb3b66888c0c5f16096df2327" }, { "dataPath": "params_shard_326.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "495436fffcdbea2943a464b8c9cd4e88" }, { "dataPath": "params_shard_327.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.55.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "db411e134ab9599bafb79d1e5adb411d" }, { "dataPath": "params_shard_328.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "bca8d4a43fce4fce6abf8e41b0a380c3" }, { "dataPath": "params_shard_329.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "7cbf0a474c33c82e4744c5516ab79a02" }, { "dataPath": "params_shard_330.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.55.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.55.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "b325ce2cefe272080ae1b0b5c80608f6" }, { "dataPath": "params_shard_331.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "eb44d005f28422b6a1c83292fe2f2a36" }, { "dataPath": "params_shard_332.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "bacff683c1a853a88f07077d17d36c95" }, { "dataPath": "params_shard_333.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.56.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1d19eedc974efa3618948648f47c9f7b" }, { "dataPath": "params_shard_334.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "68db09412f8b84374b9b9ef23148d831" }, { "dataPath": "params_shard_335.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "99898979091124fee97f3415c8783df1" }, { "dataPath": "params_shard_336.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.56.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.56.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "420a3687e0d31847b36ffd38f11c42e6" }, { "dataPath": "params_shard_337.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "9a71e354dde6e3af521ea8c055282df0" }, { "dataPath": "params_shard_338.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "08d13002cda012293ab67583f1415746" }, { "dataPath": "params_shard_339.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.57.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7449193b3bd54061b6fb09e5394a6284" }, { "dataPath": "params_shard_340.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "678d6ccbe7e3bc69a293a4f651a0ac75" }, { "dataPath": "params_shard_341.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "81c2f63c42a018b0530501ba1abaf19e" }, { "dataPath": "params_shard_342.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.57.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.57.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "04388e4eef47f514e9503b5a3075d105" }, { "dataPath": "params_shard_343.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "1f7b44a2b8a52eed70e52111b3e46fa1" }, { "dataPath": "params_shard_344.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "327a4dfeb2be613c18cae424cefc19a3" }, { "dataPath": "params_shard_345.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.58.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6776a285cc8f85620ffffe6141704f24" }, { "dataPath": "params_shard_346.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "be3255b0dcd3c221c0baa68314c82901" }, { "dataPath": "params_shard_347.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.59.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2ec9b8258f7e605b2ba26aee048c1bb1" }, { "dataPath": "params_shard_348.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8d9ac1e9836e2d2825d19e91658a68b3" }, { "dataPath": "params_shard_349.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.58.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.58.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.59.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.59.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "5af33b9a603909ccab1a686faf56d69e" }, { "dataPath": "params_shard_350.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "a3212f6eb3fc29a9ab83fafe3ffc70ab" }, { "dataPath": "params_shard_351.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "699121240240ee3151123aaac711b8bd" }, { "dataPath": "params_shard_352.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "0ffaba343de83613f4cc639b623052f8" }, { "dataPath": "params_shard_353.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "71a4acc714067d54779390b7f9b6e0fc" }, { "dataPath": "params_shard_354.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "7388211b8f3124c579d4e16a5432452e" }, { "dataPath": "params_shard_355.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "9199e8e344921ef0a21797f1647be46b" }, { "dataPath": "params_shard_356.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "483d73f3f9889fe85192f9254d17d03f" }, { "dataPath": "params_shard_357.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.60.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "41b0c5f6ae4e346c7caee269123d8554" }, { "dataPath": "params_shard_358.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1530be04f7b2f9dc30394f8373dc662c" }, { "dataPath": "params_shard_359.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "e7eb9a480dcdd4d703e57301f3a215ce" }, { "dataPath": "params_shard_360.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "e8904673b9ada42bf064a199b28f7e85" }, { "dataPath": "params_shard_361.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.61.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e7d2e735b68c5e5b5c90b8b063bdd5af" }, { "dataPath": "params_shard_362.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "30cb636a54838708f4e543ff9566ede9" }, { "dataPath": "params_shard_363.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.60.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.60.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.61.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.61.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "79f6e223f9232628e9037c715b750f55" }, { "dataPath": "params_shard_364.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "52731f333c5d5d02e959ac52e23c2d14" }, { "dataPath": "params_shard_365.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "2da9d698d90bef75a2bda01d7eec9299" }, { "dataPath": "params_shard_366.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "55e84f5fb9f9710a233408c6a6f43823" }, { "dataPath": "params_shard_367.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "76e7c793885dbc629f741e1d5b231a06" }, { "dataPath": "params_shard_368.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "370f1edc4892ede1ca6e44bbaa8ce49d" }, { "dataPath": "params_shard_369.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.62.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2de3c683863ff48fbd544ad6fb2e90df" }, { "dataPath": "params_shard_370.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1b9325e491ab80d203a9cf674dd3117c" }, { "dataPath": "params_shard_371.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "0f6ac51feac0df0a0022510a0af27b13" }, { "dataPath": "params_shard_372.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "fc287ec91eefa1cbc72ecad4e7dfe6ae" }, { "dataPath": "params_shard_373.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.63.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2295ce6d740fbf4d27ac31849b15f12a" }, { "dataPath": "params_shard_374.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "194308be10ace95beca72b85a4a56f6b" }, { "dataPath": "params_shard_375.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.62.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.62.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.63.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.63.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "141bcc00a8b20d5adac94b2f16c87ca5" }, { "dataPath": "params_shard_376.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "5a75ea7bb8ac09f4290565ccebd531f7" }, { "dataPath": "params_shard_377.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.64.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "d9f2d729b1e70a49054f7ebba620f67e" }, { "dataPath": "params_shard_378.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "a17cc6aa4888cc088d83799e4d94f76a" }, { "dataPath": "params_shard_379.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "9c954352a0934ae3f3caec6d4ec953c5" }, { "dataPath": "params_shard_380.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "edf3b8f0c4238fbb962e83a5fb7f2457" }, { "dataPath": "params_shard_381.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.64.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "57438b6fbd0570ff3254d21e4713153e" }, { "dataPath": "params_shard_382.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1d90e4baa1eebb523b524f72b409bbec" }, { "dataPath": "params_shard_383.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.65.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "362087bc4fc9a7639ba517b2f4027227" }, { "dataPath": "params_shard_384.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.64.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.64.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.64.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.64.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "eb3d9e57bb0e6436ab6b7708bded8983" }, { "dataPath": "params_shard_385.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "98808c152731797a6f01ff1f1bf3d37c" }, { "dataPath": "params_shard_386.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "6ded04176a0f50b30bef64a586f27a28" }, { "dataPath": "params_shard_387.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.65.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "79e5f39b2df47f4234bd0911ce179a5e" }, { "dataPath": "params_shard_388.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.65.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "97387a4b76bfce460655a7ed1cc6d388" }, { "dataPath": "params_shard_389.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.66.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "5d2caeffdf3b94c70ac6fa5e32fa760e" }, { "dataPath": "params_shard_390.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.65.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.65.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.65.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.65.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "671f4500fa23d0bc1f23f11e88845864" }, { "dataPath": "params_shard_391.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "bac07ec715541cb6938f494f2a24c265" }, { "dataPath": "params_shard_392.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "a387eeaa4a72eb7c3a17123c415dab6b" }, { "dataPath": "params_shard_393.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.66.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "37e8fb300bdca19daa2b5d1684cd7631" }, { "dataPath": "params_shard_394.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.66.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f88cb0a665f22d26ff25dd17ada46bcc" }, { "dataPath": "params_shard_395.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.67.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "1a71e5c5b123a05e54d38a0719f946c3" }, { "dataPath": "params_shard_396.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.66.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.66.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.66.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.66.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "48f2e7132e4334c46a2764b89607b3a3" }, { "dataPath": "params_shard_397.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "d0844a8b4b2ab6c0d5452b98444c94d0" }, { "dataPath": "params_shard_398.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "ddf0e2b619bdca52318fb0ccc8f554fb" }, { "dataPath": "params_shard_399.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.67.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d6ebb460c3564c0879295f27a9af3a42" }, { "dataPath": "params_shard_400.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.67.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5dd67999912311f2da248f54f0b0b9f8" }, { "dataPath": "params_shard_401.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.68.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "70c4ae249dfd8fee7ef69041e4bb38d1" }, { "dataPath": "params_shard_402.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "452ac2ffc85e37d11e730b71d2d8c4e8" }, { "dataPath": "params_shard_403.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.67.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.67.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.67.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.67.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.68.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.68.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "26cac7d95290bc1bf22180a50fbacb4b" }, { "dataPath": "params_shard_404.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.68.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "6caebd9d0235e53f929736291555aa7a" }, { "dataPath": "params_shard_405.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "8a7de1e38ab9e8373d9a235a6197f3fc" }, { "dataPath": "params_shard_406.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "92ce9e257c52b6596b272e0311a01c7c" }, { "dataPath": "params_shard_407.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.69.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "97826000b1458502b8f1a0f49f62c1a5" }, { "dataPath": "params_shard_408.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.68.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "e953be830ad536586eaa73061ba9a610" }, { "dataPath": "params_shard_409.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "801f7419e2222a38b663dc9e2f6aa37d" }, { "dataPath": "params_shard_410.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "c1a872eb05a9baf17aa7c007eeb857be" }, { "dataPath": "params_shard_411.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.69.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3bf4050c80dca0962aea3517192ad206" }, { "dataPath": "params_shard_412.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "31d5d1e1a75549a5ccb2d3338f8e1723" }, { "dataPath": "params_shard_413.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "cdc500774bd1cb9533bfb55e7aee4604" }, { "dataPath": "params_shard_414.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "631b1d31b8e1ba9092290e348ff5884d" }, { "dataPath": "params_shard_415.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.70.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "171d0c0da5f65b76aa8968ef0173c569" }, { "dataPath": "params_shard_416.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2a60cfe431b8d7f6174aa40f2d48d15d" }, { "dataPath": "params_shard_417.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.69.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.69.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.69.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.69.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.70.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.70.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "984018245729bdba51885c634f80a67d" }, { "dataPath": "params_shard_418.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "6bb2310aed9ffe3ed778902064b6f647" }, { "dataPath": "params_shard_419.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "a7fb4bb7894f47193365e6dd52374824" }, { "dataPath": "params_shard_420.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "09deef8cb9fc39c10217d3890431aee2" }, { "dataPath": "params_shard_421.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "27a421259a4f8a76c005381c7566920e" }, { "dataPath": "params_shard_422.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "e05a83f05cb2050f52d6c67100a20fcd" }, { "dataPath": "params_shard_423.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "31eea45b3b2d2bee4891f4830879cb95" }, { "dataPath": "params_shard_424.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7d4ba9744a3363861475fee9541f3278" }, { "dataPath": "params_shard_425.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "d7ddec0fb55168d59262318e4db168a1" }, { "dataPath": "params_shard_426.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "14ed14e94d3c9ab9f737785407038404" }, { "dataPath": "params_shard_427.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b611ced059791f2f65aa66ee8a0bbd97" }, { "dataPath": "params_shard_428.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "174d34e137821561dfc3048f7b60948d" }, { "dataPath": "params_shard_429.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "f3e1f01ac3b52bdde70bc57321118040" }, { "dataPath": "params_shard_430.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.70.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "560d34995d4c43399ae3f4dd7e7dbc46" }, { "dataPath": "params_shard_431.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.71.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "61041154b340e05648d996fadcae75fa" }, { "dataPath": "params_shard_432.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.70.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "75f81e2cf518fa6913821fef2290c352" }, { "dataPath": "params_shard_433.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "6555926b665db7a9be0707516744e8e3" }, { "dataPath": "params_shard_434.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "49e750730070fe9f513b269c520e9035" }, { "dataPath": "params_shard_435.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.71.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c42de563295f0d7ff745e4055733897d" }, { "dataPath": "params_shard_436.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.71.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "697f1f7ae82a155ffe058e7111f6da4b" }, { "dataPath": "params_shard_437.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "4d401270cbbc467b7365de5eafeb22cd" }, { "dataPath": "params_shard_438.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "dd2d82c4954204d0ac0ca623d09fd1df" }, { "dataPath": "params_shard_439.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.72.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "81bd6b55cf5d73ffc62cb6ed74a1d7c6" }, { "dataPath": "params_shard_440.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9c27122cc90363f9401d3e75dc2c1bb8" }, { "dataPath": "params_shard_441.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.71.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.71.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.71.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.71.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.72.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.72.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "3c0156ceb79ca13844761d5f2cc8c58b" }, { "dataPath": "params_shard_442.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.72.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "7d403cae2a968274a3dc3ebefb245a93" }, { "dataPath": "params_shard_443.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.73.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "bacea92b827b12d312f1a42ec56a659c" }, { "dataPath": "params_shard_444.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.72.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "f3b8b83aa6022a363b6a8997cdb54a14" }, { "dataPath": "params_shard_445.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "ac77ddecc0febb300713f4ea30986478" }, { "dataPath": "params_shard_446.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "956056d71b3a4409ca7b9b692d4c91c7" }, { "dataPath": "params_shard_447.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.73.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "10fb416fe35bb9fafe16e13f364dfc41" }, { "dataPath": "params_shard_448.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "123899287c679e7e9083e6c2ab7d8f41" }, { "dataPath": "params_shard_449.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.74.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "5b60c89d23da1059ff8683a9e6974ad7" }, { "dataPath": "params_shard_450.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.73.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.73.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.73.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.73.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "900b89042df9624803408648efcf4fdc" }, { "dataPath": "params_shard_451.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "b2a255e1df27b3a8505b758108e52152" }, { "dataPath": "params_shard_452.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "e9f908307d4a59d71277b719ef5f4fe4" }, { "dataPath": "params_shard_453.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.74.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "da98ac359f0bea93da622350499d6966" }, { "dataPath": "params_shard_454.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.74.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0c70b6ba7f4aedd24eafd82f1b4e13f1" }, { "dataPath": "params_shard_455.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.75.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "5f78329b6ca218eb534361a4d4921ba2" }, { "dataPath": "params_shard_456.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.74.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.74.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.74.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.74.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "a27c6ae6095ff2dcb15f3d4e6982c4c6" }, { "dataPath": "params_shard_457.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "1b27eff2ed9018e8d25ad3c3a6e6a48e" }, { "dataPath": "params_shard_458.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "d67adc2f3e78bfde4ab8602b3f7f6ee2" }, { "dataPath": "params_shard_459.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.75.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a9c160aa999e19dfb7c385b66b9281df" }, { "dataPath": "params_shard_460.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "717c7deb214ec5aae3e21994d616647a" }, { "dataPath": "params_shard_461.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.76.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "91739db241f8c89706aa2702a70be150" }, { "dataPath": "params_shard_462.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.75.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.75.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.75.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.75.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "7e37c7671629bb01452cc38bb50bb87b" }, { "dataPath": "params_shard_463.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "f158831e110e06eeb575a5e1c11c3935" }, { "dataPath": "params_shard_464.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "e705011facd3c7b62954caf5cd928f8c" }, { "dataPath": "params_shard_465.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.76.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "67d74d94bf0a7329ed5d3d1024f14eff" }, { "dataPath": "params_shard_466.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.76.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c1c9a7826ed8f80469129d3257bf2052" }, { "dataPath": "params_shard_467.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.77.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3a4365d0c8ff5c0b6b7609b107b15bd0" }, { "dataPath": "params_shard_468.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f6841248f4ec4f13744330ee86c268d4" }, { "dataPath": "params_shard_469.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.76.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.76.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.76.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.76.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.77.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.77.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "ec212f7a8f16288b60806b4a7680b47e" }, { "dataPath": "params_shard_470.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.77.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "1e51caaa1a5e2ea9ec4804d8e968d2f5" }, { "dataPath": "params_shard_471.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "20fd2384fa52c5b9df8aa4c3a8d38ec7" }, { "dataPath": "params_shard_472.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "1c535afe1a8f993b5ed5d8c47ceb6606" }, { "dataPath": "params_shard_473.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.78.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "2acac4dd2dee6ac60eb0985346fb78d5" }, { "dataPath": "params_shard_474.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.77.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "ba488290b90e769eb762715ca97ba5d2" }, { "dataPath": "params_shard_475.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "824974a7c7f2a38b8386479cda071798" }, { "dataPath": "params_shard_476.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "109bc5be9e3366231c739a49799c57d3" }, { "dataPath": "params_shard_477.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.78.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3438cfac25f12b7b77b53c15ae403904" }, { "dataPath": "params_shard_478.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3cca11ce38ae0ae052a6b16c919fa09d" }, { "dataPath": "params_shard_479.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.79.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "565e2b296887f3bca93b82ec531a9e51" }, { "dataPath": "params_shard_480.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7966d4f349864cd095fb3640fe8d7691" }, { "dataPath": "params_shard_481.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.78.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.78.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.78.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.78.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.79.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.79.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "dfdc977734c4f9357824be9ef0ade4d4" }, { "dataPath": "params_shard_482.bin", "format": "raw-shard", "nbytes": 4194304, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 } ], "md5sum": "aabafd87455c1af1d7af6ef508441f02" } ] }