{ "metadata": { "ParamSize": 885, "ParamBytes": 40900313088.0, "BitsPerParam": 4.157646319274502 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 622854144, "records": [ { "name": "lm_head.q_weight", "shape": [ 152064, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 622854144, "byteOffset": 0 } ], "md5sum": "dee58ccb3e2c1e57a8962118b1991c18" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 77856768, "records": [ { "name": "lm_head.q_scale", "shape": [ 152064, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 77856768, "byteOffset": 0 } ], "md5sum": "1d3324903b646695b0a4b4144c09b3ba" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.79.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "d682fdbc75cf3684075bbac89cef527b" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "c49369f5cb58cb07250e4927d6a99835" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "30e42aa3269d00f267476eb7a145f63e" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 622854144, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 152064, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 622854144, "byteOffset": 0 } ], "md5sum": "a305387fc0dcd8e92aeab7c204b7a973" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 77856768, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 152064, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 77856768, "byteOffset": 0 } ], "md5sum": "e87b87426ff72031101a865bf51dd559" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "19de18719e4f04bc324a51ba2ec9ce81" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "c87166a568917cd272477d963c4b28c7" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "51bbf9fa88ee6d2f73db0531c6e9a932" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5a4f1086d883039a08f8b46df7dc7d5c" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "758faa47ec0794e472e660cf8bb7b3ea" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 24645632, "records": [ { "name": "model.layers.79.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15155200 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15171584 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15187968 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15208448 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20451328 } ], "md5sum": "71b85cb71dafd73d146ff0d20edc216d" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "2b92c431bc9f117779f1e00b843e7598" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "ddff8b1b7441a1875232a69a55c51c5f" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "750201db3fafe9291f0b3b6906885533" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6662c9cb7afc02c985095eca56a3815d" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 30330880, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 15155200 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30294016 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 30310400 } ], "md5sum": "4d3897684e0969001345d83ce0ccf6aa" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c501c62fdd000fdbd73b613fe96c8202" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "0d51b3a87f8fdcfad1f08cf2dabfa1c2" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "ff8cc677b7a3782fe53a42679d798f9d" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "1bb4ad2bdea880b952307c9bfcd3597e" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "aafd91989776c302b42dfb3994e0f320" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4e84f05425f6eccaf8659395ef1dc45b" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 29872128, "records": [ { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 9453568 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24592384 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24608768 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24629248 } ], "md5sum": "434013ea79ee59f7ca713574cd7a4c98" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "76b25171b3420a06fcec3804ba796728" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "77d2d5d89475eb32eb948510f6c751e0" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "2e35c7e86073f503a7320004ea0f35af" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f0145a12a644d6b607e0f2b497a3ace1" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c0b14656c2137a6a51a4b52d155900fb" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "87d229e4db2d6e990e7dc79beceab3e2" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 28856320, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4210688 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4227072 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19365888 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 19382272 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 19402752 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24645632 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28839936 } ], "md5sum": "f843820d6b960bafa2bd914606a52fc4" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "7ed1d6b66b5db89486d9eeca802acdf7" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "9f2753a0c45f673d4faf21c836decb70" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "65bf0bd9cb5e8b8740a3b649bbf5fc3a" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "33fe076f8b9140daa1c45f9ce486fb26" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "9fced072457d5e7da02f445c188b4aad" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "2e3ea0a9b93351f701bc22d4051b86d7" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "d81cc5ac91b223bee3d983eda1ed090d" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "31cfcb61f0b24d503b3a960df0866aa2" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "0c95ae8ba2fbeb57c3c4c0f0be1e24d7" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "54573f2a6ebb81316f92bb0d05ea44fb" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 30314496, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 15138816 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30277632 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 30294016 } ], "md5sum": "3b96190e77480c1efc513a1a06b5470c" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cde33c53547181b43cb3c2fb424a4051" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "a0da4e35f29de13e85bcf071be645e36" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "4d429ae7fd2999b0fe7852d694d4375d" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "369ede040fa045b35bf5f421ad69ea0a" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "adbf6f4cd71237f169ddf9a5ce90c3ed" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ca7f2caafce57bc2aea13bbb1d46535b" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 29872128, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9437184 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 9453568 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24592384 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24608768 }, { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24629248 } ], "md5sum": "7c4a6aae312e80fa450ad79e1667acb0" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c660e0fad669485e8a8ace59f60299b9" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3ee79252c12a5048e581a7b5ea9b7e3f" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "1c67d09f9ce96719b742c224ed3ca2c9" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "acc9139e34cc955c3b5605e44d73efd5" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "cfe008d43a5423c8e3ca4aa401fe2117" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "b4aa5493b02cf0cae76374c888d8c90d" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 28839936, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4194304 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4210688 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 4227072 }, { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 4247552 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 9490432 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 13684736 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28823552 } ], "md5sum": "bc2cc591dc3815b0229d3d75e346f1fb" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "fcdb1a364756964723ecf3ca11ad730d" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "c5be5c583b3b248daf107f3211ad4d55" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0802a24eb0f499fea0f8622a617fa9ec" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e7c790c1b6effb0f00d23d8397edead1" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "da86878017e64f0854269eaecd54d498" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "6b71ecb32c4a7f22712f2489bb0f36b3" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "dc37582035ba3b565773f9447e92ede2" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cecf45e0f82865edd608817f126b10ae" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "434114f5d0b2c58394536d4be8439862" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "ee0aa9cdb3ae6d05f56e7ce02d76da14" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "5dc596d76b808421e4e8c1fcca0b0c41" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "7cd56f378f4baf97137ddb01b4f2e136" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "21f3748b293cb74bd89979ae21543ce8" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "1df22b002dede3a7a8e117ff71e7a812" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9484f102182082bcf688d138a8bbe47b" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9c30c94d0aeaa814d00f2cbc304bcd76" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "4318ca6331ea1e144fe9f4c9ce39aa7e" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "490daea7c1013522e930eae5aaa8a76e" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "cfd276aee66eeceb71ab13dbe73d60f2" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7980286e554cf11febd66fddcf83b1ca" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "f54147719d918a0e8f813fe3a76bc3e8" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "fd6b9d9af8c073748c617b010376ea7c" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "79bc1b019ad70b34ae9be2bcce1d7af2" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "2bcf95a0c9356215b0e94efadc03b34e" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "602a9e5173f90919613d5c7504bfb036" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "16ff137f3c6943819761c633dbdbf2f8" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "bd009ac964afd9c8e70b91e87554460b" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4e886c2008a4dc70c00e400c77a1e7c9" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "5c06bc191a06cbd0440d0dd1c5fd76c8" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "cb57f09981be9d8d7bbd31a7fe2fb54c" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "455094ad1cb045ed55d46b908adf0914" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "4c831e2bca70b08220997b1eea62ac04" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3b64af5521014f22ff3de8002e1fc49f" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "013ac5bb8307efb4bde0a9629f193545" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "a5de6532efc51cf1fdb9a4b3e1d8df67" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "0b27502e45b0bf805b6c7f2e2a0a37de" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "e4a3d56073d5122dc3a1de3c31652703" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "6d2b752b6ec64b7d3dc80637f00e0ef4" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "48d4174aabe796f912d79db6f14d0f10" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "523535818da3fbb81a987e2caf36cee6" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "b653cb24181f7a284befca43c05f51ea" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "cf1a58ab8a99a9d86c822187f40b48f9" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "3b4438eea3f069b7ad45a69651a14ace" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "82ac6dbdfb39f1923ca4d6ef2d9ebdd2" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f5c89d96469d4391c9ab7552e290eb45" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "16443b5f921e5ce45bb20fc77c42f9b5" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ae72733f2877ba8dd494fd99dfdb1660" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5480eb5cbdc313ce1ae0c1e731bae076" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "9556f12cba54cd8346cbc54bdd5e1300" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "c0f41d1a893b9393040dc089f5492a30" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "29e0fe4b7368967ba9299c99952310fa" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "71b4d4ba412c66a8ce9477b3b29baad0" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "90737fde0f951697610cab046cd6d1ca" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "8a2f6395af81e57c77591dfc04492756" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "1ad346fa98ea65972966ea09cf0e3fd4" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "9b4119e7f067565db6b3b7b622c31a71" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.24.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5ec4ef90496cfa745ff7b1fa4778a2db" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "51470e8d1467236663306217b9a5b7f0" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "98eba3c790211063e63b90a2e7830838" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "dacd02ad30bc41544cbcc38a0fead9d4" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.25.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "aaa3784feccd32c46a1dfcb5bee35d19" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d993bc53d9da5230bb7cc9a2894096c2" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.24.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.25.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "05ae15ba00e638364ff40d5f82b6554e" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "e34aaf52552c237c8c8bd01b496450c6" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "9059bf38bcf26e03d016554c1f277681" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "7f112b97722a8d81d64d25932c605588" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "7bdb1f88bbf49f0d32554138812949f2" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "84303c0a58f484f2f57d766a2c3149f3" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.26.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ca636426c1e1fe351cff078f5a58fdbb" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d3dbbb25a6f1fff5e0340935307d581c" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "f4165e1e6c4a05eb37005ebc9cf9495f" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "04a3551e6da79d13957054004a8a07de" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.27.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "fd16cf43f94ec685c3193e8cf52957ea" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a5897e448cfc8bc06cf52294de985913" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.26.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.27.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "b9fa14fc365dea0d28843448ff8e22b1" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "6f6fb7535fbce752c9543eebd3bffb3d" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "e1f7d6a65fbaa6398c516e1fc25d477d" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "7444bb9e6190aba86c1bf145436a4a59" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "fcc66442360f311a0d44dd992783fb95" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "1fc96993232d55967f895d2ef223c649" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.28.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "761f08b4115b8f69fdd49b7ff402656e" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "874348f15228c71fa63e9663d5502234" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "3211a294e5502a3c9b877da80631bc3c" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.28.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.28.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "db496285d1111b0a6617864e1b6e1576" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "f9c1e1c1a6051631908bc82ab27b4eae" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "f976654e0e50989ffa5bbb0e7a0920a4" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.29.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "cc11f1c657fb3dd36b7bb8cb9404029c" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "96fc11a15044bac9741d963e7b35bbef" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "75b4fe9e929d4bfe207fecafc286e3d8" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.29.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.29.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "d48fe92ec8e863706416dcfd5a638959" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "04466f0f6a935baced186645ac412229" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "47e50881d5fcf08f557302a3815cd5a1" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "38eb18ff3abeb7a72018b6bd14694734" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "20e8268e650c48f979b58647c7c0b938" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "c3199d020e60c898acf22ad2a2e3d54a" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "af67ab337721a10a21a2cc7f1c2e5825" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "50a488fe94b5baf812b127a3211983b2" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "8f3ec334004477a884563463504e19ed" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a343a00a9e9cfd40eda6c88e537da8f9" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3725c056defea0f3ca9a009ca56fbf06" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6f8d8ab2f7503299f000af9be8cf78f9" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3c2d786b2400c1c797b8cc5b7efb45ad" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "d986cefbebb45a1941e05525b302f06c" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "4b20c6fa509017b2c5a33585c0af967f" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "2c64586913156fe550cb5f595ac0231c" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "5f724f8471d1ad8f418fec7f7f864023" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.30.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c3c4b42937f22213a99f01ecb38aeb7c" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "91ddcdbbd9abd5bf94610b5a654d20c4" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "698861d00cf3db12d6ec56d3630bc5b2" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 28823552, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 }, { "name": "model.layers.30.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 19349504 }, { "name": "model.layers.30.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 19369984 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24612864 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 28807168 } ], "md5sum": "680e02bf840e3a965ed44e2e4984653b" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "ab50389f2699e6abf6b6b9d4370975d8" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "446184cfd23cbb784eb0a51d30ad49c8" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.31.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "39e0fca8e8a70f07d5d9293f789cba58" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "41b2b0861bc3e27489bc34c9b7e722bd" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.32.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ddf83d73fa8f56223aa99954d49972a6" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0fd923e104402445becef26971fe1bb0" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.31.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.31.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.32.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.32.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "83f15d0fc1b4115730e79a74c6f334af" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "a52469eb9bf71eb1d05db429554c840c" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "25ccc04d8e0da92a737479f0658e995b" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "37d0ecc7a7c66ae1ce3f9f39ae5ab8cf" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "8926370a8fcc7a0a1cf07cd61a56051e" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "ad1f046fdeda702f56c3f50db8cb1c6d" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "776647842bbfa8731194f635a45be915" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "3c3ff015ac483f9ac4e79a0da56fc060" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.33.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5d044ade0654a39b9fdb53d33bfd2cdf" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "06869ef6a251c92bd757809c12444673" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "11a31711f7ccb3d5bf93d432ee30ba4b" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "fb267709df1e1621922a3bb424d7728c" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.34.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9c0a0af8c7c585e1056d5c127fa62c6c" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4d53c69df81cb43219dfe31e33473d07" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.33.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.33.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.34.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.34.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "afa572fc2613e5f4b8976e70dd17c109" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "3412c38e00f02afb44aba37c9270d6a0" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "0107ff02aaedb5a923faccb74233923b" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "4a4afab5198eb5225341042ce455984a" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "40f20911a23067bab4bd276a85cbbb3c" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "5d9f042b71a5bf740223ec999b02ef66" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.35.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "af28550b62a3cc055cebcfd686848afa" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8357b923654ed96fe2a5960f62e550ad" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "512b4505f80c408fb0cfe961a841a61c" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "aef0098e16a7ffc47a2c2b5795d49250" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.36.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4c7f0227dc4550618d59523fe53a3e6c" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b14b8124469f89fdf31ea1a12b49c602" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.35.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.35.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.36.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.36.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "c2c675ebbf7e9cecd235ff55db35278a" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "a0aab916e0ca2d7332d81f712ed43f48" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "175b9387f96cada1ec74e997bf546b1e" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "53f4f3d6fefd784a9d3b089b491d1b83" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "994ff970b88276a805c993b81f5e3ab6" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "c498ccbf40f5d613af59f4d4a51e0257" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.37.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "bf990df3232bbddec6da4d44dff4b526" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5c33a003fd294caa3fc3b14e0856acc1" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "c97b750ade16d263703764dd950f9a84" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.37.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.37.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "67ebb32d7770a04863e13727a21fad4e" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "c1ddb68a41410b802a34d26961e5564f" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "70a69d67960fb3596fa5ee2c71e4b55e" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.38.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e0a6fa78acc4ff6b9f13abea0558491e" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "936e58b468deeb02079d43dfa8d62bbb" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "3dc8e6736ba11aeaed6551ccd4405710" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.38.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.38.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "18a775eeed81b79994aa149481599468" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "f155f3f182b4966462b8abb2fbbce8cb" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "8012711fbec80f149e93e97fadd97ea1" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.39.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a33b055dd3e0fcfaf3f7dac26d4e07d3" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "59e37131b4b1ea63677cf7dc71cf6011" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "35036a5bb47bdcfd51fa0737b8886da2" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.39.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.39.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "78513decde39c65a52c9a1bfaefc3a2d" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "3f3f22f96f6652986b67bff51132ab88" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "a4ca8c70c8fc1c39adb8f4c745eb4f91" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.40.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0f7477fc94ac81d94afad9d9650648f7" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "07d1327c99b08af5b00275994635a248" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.41.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a838439238c4defb1ba6cff6d1a51e0f" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0b99b05c2d6f1197cadd4e17afbb05e6" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.40.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.40.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.41.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.41.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "07c224e6989224a60aec146c8f9c4999" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "bbb870490506492d90d8a1f430f3bdcd" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "441022cb5394f8d0f6d93c8ac5a7399f" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "44ce01d4a42740a2c33120578a014afb" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "fb174561c14c72302e75941140d440ec" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "cd6a7bec332192fff643d9e1175c8dc9" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "1da707b18b4233639b3d466f478ed636" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "cd9ca10bc106250045a66c27250e0138" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.42.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "586f08c5444f4b7d2bd54ac7631835c5" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "71e8158ca5d8c4adb86d6778df782442" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "637a3595aba67fff3bad79042d828d46" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "d086210385f47ab8837d89a1ed8b4bca" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.43.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a3bf6765bf6f7a84e2f82aad652873f9" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b51e768ef7c47ec10f4db4a0a4cc5782" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.42.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.42.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.43.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.43.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "258500f4c6b84d41fa1f6fc3c65139d5" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "4a93b0e5dd724d1d61db265c96de3eb9" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "d8bc7b54d9117088c6fc632ba58683c6" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "bb175caf5d7c2ad12892511f24a36da0" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "6e599d662d042ce66cf5a3c2e7bfd5ec" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "2d46336df5390cbcaa18e7bcfaebb0e4" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.44.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2634efd2f993fee03d196cd5f0ae41cd" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9c44bf0088850d82311c03a9bf00bdc5" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "806b14a0bfa2dbf6a391bd82ade37296" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "8fc13a1ac0326c26641066de7e1310cd" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.45.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "02f1b907a5ec0c12fc535d9bbfedff5a" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7f55d92af565db82970716a500e5e99b" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.44.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.44.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.45.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.45.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "d9a8d2b4edbbc790f7eb5cbc48fdb5e9" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "e122cde53c304fefb2bdf3bfc9d7c8a6" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "5588421da3b564b1ecbd864cdc17b22e" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "aec28402e1f2beb1e9de8aa5e46b59bf" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "3c4bfced1d670ac7bd81e14def4f5e53" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "07ae712531c3813cb98356bba1fba520" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.46.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5d41910aa55446dd3f42e7fe1917b2a8" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "94ac29b75adc40e2658c210fc758accd" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "fa7cc5c2085f80f587a095bcb8305670" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.46.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.46.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "ba33afa7f1e6132c905cf625fd1c6afa" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "5c51633971cedfe7fb78c4bd868cdee1" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "9b131ce561ff59624d4edea4f3651af7" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.47.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7b1c58c788fa25827152dde8d18f5ad6" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4b51bebdd69acfa7bb3d3cf910424811" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "3ead87bd7e598573e6e3c5a59e4fbef2" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.47.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.47.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "c15ba9d11f6af14c9d3e64bcf80100e5" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "0ec9e6ceb727304188ed179fff1b5bfb" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "4ed5fa92729e9b9a7e7d07b9f8a3ea72" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.48.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "bbdd0b69882e66b93cdb01f2767ebcde" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "38d9ab0547d0b324c8ffb6dd4157de70" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "d595cf998edde1b2614c7d121fc77123" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.48.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.48.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "dc6e4d833acbeb0dabc6efb27951de84" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "c893a1da9437fba8a054ff7349749f8e" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "9a2a8347d856ad928249685cf4fc26c1" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.49.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9e291270c8762bfe8ac367f0c76386f5" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "32b9c4841d2c945682ba2f78ba5916e5" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.50.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9c0d2b445053b087f9c3b98be078392c" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fb7ec4c227e3614b09e32c6130e19548" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.49.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.49.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.50.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.50.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "b2f53eba20e4df438f1762cce5744178" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "021aca9a1a207481e64ad44dd8460b1c" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "7ac62158ed1ed8f5c9cf75087f33253a" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "d5f6c88dce4144c6e1c0ab192b1147d6" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "3f707580baf19910e7601cfa823d65cb" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "4c5a01e5db0741b4f16785f8365c7c37" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "f6dd91eabd7c67606c967c9152404362" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "ae7d526125de5c09d1b93e1b5d226428" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b1e28bddd2d7b479f3c693b87ffe4ba7" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6748b89b922ad1cfd9b667128328341d" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "4cbcad917b4155415c47dcc547160729" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "40c9f9c146f919de808238fbb4e53620" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f2e879056bb28e5fb9b693c9d809e422" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "37f88776b3fdadd569558b60cefb3c9a" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "cf431ac2aa501fef776d5a85a2fa984a" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "1f4660f40d222f789a833e6a097a33e5" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "360095da5fa7b476c10956c5efe59398" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "98f9a845c979372a91598964fd9db13a" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "51f3bc2714b1fe2d9d7deb5afbe90362" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "3bf760f5884024dc96863353a4f90d27" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "47f8a7c09c7021f01880ab42098151bb" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "24c432a9156b42d3375c129870996ce3" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.51.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "570ebcd1f342d077d8e48f0b236887be" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "788fef1a05562e039b33e6652e0e48f4" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "2a5b94b26d5ef6cf33d9be83eca35f1c" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "beeea6c905f8bd3d1ea181bb7c9dd556" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.52.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "132e0bfdaa95cc442a16f5d96386abbc" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e65fe7ae7692d774fd03284aa8e19d2c" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.51.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.51.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.52.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.52.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "4d16c75c4006c91f1cd4284338a06a0c" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "d4b0c12bff07471c152ef5d80d2f6768" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "961054977982428e32689a08e1dd5b27" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "1c6f8a2d346b4941583642473031ec85" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "b6f1183e92f8b6208795646a65eff316" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "d2d017cc393496e5e1cebbaf1e1b0f7a" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.53.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9411e47f95222c63bdfcbaba9a9429f8" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "52f7d306282d778245cff7fe175116ac" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "a5ac10d6faa01cf36ec7427f20584d91" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "0ac51e5431543499bff69f0a1253f081" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.54.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6668c595ac51ae774b98fbdd8703ab78" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "49dd3bdbdcbe02e3697c5bc063088e8e" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.53.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.53.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.54.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.54.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "2e2afe845849eb41c07d01c0f2af195d" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "31b9fd6a29e1e1b293eaf1de7de1c069" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "28d3d30742f5f3129a5499a684d02c90" }, { "dataPath": "params_shard_324.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "606200730572e095b346a4d9cd3bcc2d" }, { "dataPath": "params_shard_325.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "86a1c71bf30b99ed03644fdbf9785124" }, { "dataPath": "params_shard_326.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "c07639cfb5977fa2a92c5fd539d16786" }, { "dataPath": "params_shard_327.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.55.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c09c51350856ce689aba36024b8f2d11" }, { "dataPath": "params_shard_328.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9a2927b4a731e1d12258a70eddef40fe" }, { "dataPath": "params_shard_329.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "cb2414d79f7bd4d443cc45d6b0cf2915" }, { "dataPath": "params_shard_330.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.55.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.55.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "ab53b88903e622ebe2a4062aa4d17fbf" }, { "dataPath": "params_shard_331.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "913ab2ed5c9eae37badde27ff64b10e4" }, { "dataPath": "params_shard_332.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "4ad263bf876bc02965da89062c6e3ae8" }, { "dataPath": "params_shard_333.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.56.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "782c35772bb4362346d51550455e03a6" }, { "dataPath": "params_shard_334.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c3f036243d0769e693a18fc3f76be9e7" }, { "dataPath": "params_shard_335.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "28925ccb6fcdcb14c8d0c4969bd813f9" }, { "dataPath": "params_shard_336.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.56.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.56.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "b5411962584eb7b25d1f02662f2995bb" }, { "dataPath": "params_shard_337.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "6a18a4b65e26555bc76e82c310d25ff9" }, { "dataPath": "params_shard_338.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "89e15bf4e2a2799372a165e7d651bf9e" }, { "dataPath": "params_shard_339.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.57.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e7b5e42fb0c69f03e330e67aabf793b2" }, { "dataPath": "params_shard_340.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5304a2374949d834a8549dc6bae80859" }, { "dataPath": "params_shard_341.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "ca81bc147896991532828bef77241907" }, { "dataPath": "params_shard_342.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.57.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.57.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "cf798c80a848fea8eed4f9f6a2b2e2b8" }, { "dataPath": "params_shard_343.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "d670a8acf9ff6db7ff5be026cf5d383e" }, { "dataPath": "params_shard_344.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "43f6cefa574e748d79ee733bc770e85a" }, { "dataPath": "params_shard_345.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.58.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7ba44d3c82c321a059caeddc33dded9b" }, { "dataPath": "params_shard_346.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4435ce780eb9a53220fbf47d3d8fa125" }, { "dataPath": "params_shard_347.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.59.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ed3611906328c1d4c0f112bb006126aa" }, { "dataPath": "params_shard_348.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0cc498ae2121d9d6e24fb3775e8618c0" }, { "dataPath": "params_shard_349.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.58.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.58.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.59.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.59.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "5dd94e0c19655ce83ad596df65a2a6e0" }, { "dataPath": "params_shard_350.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "db52025098ee2616517af09c7457f311" }, { "dataPath": "params_shard_351.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "ac729f6f15b12c975204e78740bef39e" }, { "dataPath": "params_shard_352.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "b2cd769cfd3ab0326fe0f5371c233714" }, { "dataPath": "params_shard_353.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "aa718444b3fc37c54c608e1de9dd0166" }, { "dataPath": "params_shard_354.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "051f2e1745ce31b0576141dac3c9b4a4" }, { "dataPath": "params_shard_355.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "663ffd9040387b0b40caf721035ba73f" }, { "dataPath": "params_shard_356.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "f23117f778835120ae091ff451b6edfe" }, { "dataPath": "params_shard_357.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.60.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1c998310118d9db67ef849112bc4f1a3" }, { "dataPath": "params_shard_358.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6975941bd5cbeba546be47461c6b63c3" }, { "dataPath": "params_shard_359.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "9869d02061c02169fbb8881dda22cd42" }, { "dataPath": "params_shard_360.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "5234319a7d1d071d84662cfca032fecc" }, { "dataPath": "params_shard_361.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.61.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "610c8f6f0acaf25f7ba240ee04edb14e" }, { "dataPath": "params_shard_362.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7562fe8843af1261215e44a188a12f34" }, { "dataPath": "params_shard_363.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.60.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.60.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.61.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.61.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "63ba448579b6536c28b0114fed434a9f" }, { "dataPath": "params_shard_364.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "fd7f98adcf6591fbb77d8e378fc4e189" }, { "dataPath": "params_shard_365.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "dc84bad8bc16374aa5ee88722feb9a81" }, { "dataPath": "params_shard_366.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "eea4c9b1a6a956b885cf718444c5d7c2" }, { "dataPath": "params_shard_367.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "4cb622f891e50a96db90c19e7a700694" }, { "dataPath": "params_shard_368.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "96db061d22ec68cba6a52d29d16c7e1d" }, { "dataPath": "params_shard_369.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.62.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6b065069693e508d7c089fc94929a32c" }, { "dataPath": "params_shard_370.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1e4f5281cd284b1168bc12cf648de12c" }, { "dataPath": "params_shard_371.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "d6cb331b1f309c15d90386f3d98ce40a" }, { "dataPath": "params_shard_372.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "27098f9631d52f9c96c902f0e6201ff0" }, { "dataPath": "params_shard_373.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.63.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d6a0b577b2d29341e1e1f61e96700b7d" }, { "dataPath": "params_shard_374.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ce6907b16d04ed06d928226371cda5bb" }, { "dataPath": "params_shard_375.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.62.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.62.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.63.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.63.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "2203476d822d59e36a26f2b566d6d7c6" }, { "dataPath": "params_shard_376.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "60c4ac049e237c9f16d30d97f9477643" }, { "dataPath": "params_shard_377.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.64.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "1612138d5f86b89809104f59ee29b097" }, { "dataPath": "params_shard_378.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "25b5b2937ccaeafadd7c5766397f88cb" }, { "dataPath": "params_shard_379.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "57c7ba01286723fc99f24eb5a4018480" }, { "dataPath": "params_shard_380.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "3c2ab0e2bb588a24ae58a570994c392d" }, { "dataPath": "params_shard_381.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.64.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d154fe0be12bf7b68b353b5bc3d065c8" }, { "dataPath": "params_shard_382.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "34f86d24f2e3e668c01bfe28d031d1d3" }, { "dataPath": "params_shard_383.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.65.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "fb8b567efe173a3c9f7f3b3bb5be7863" }, { "dataPath": "params_shard_384.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.64.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.64.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.64.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.64.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "eb22ebb727028556e6f43ee47bf74487" }, { "dataPath": "params_shard_385.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "747b06cf5da96b622ef45df90fa9ca76" }, { "dataPath": "params_shard_386.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "afb9b091fa0ce7b4fec84db03c79b7a7" }, { "dataPath": "params_shard_387.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.65.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "31dfa8bc90e187bda850a841c2b9378b" }, { "dataPath": "params_shard_388.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.65.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2752be35dc928f8b6504fd8db8c7930d" }, { "dataPath": "params_shard_389.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.66.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "05e275d5560d6e9a85c457260361314f" }, { "dataPath": "params_shard_390.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.65.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.65.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.65.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.65.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "e1d3baf0fc69e03f617ac06eff27e6d9" }, { "dataPath": "params_shard_391.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "e6b0c15f9dc06c8b1d0d19a784b2d555" }, { "dataPath": "params_shard_392.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "d5e5344c4500e5a097f2ffdad68384e0" }, { "dataPath": "params_shard_393.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.66.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "07da68045db22cbe913f1e40dbaeef1e" }, { "dataPath": "params_shard_394.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.66.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a98697d76f3ced8ce1b726220bbb02d4" }, { "dataPath": "params_shard_395.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.67.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "00d5d8423c68e81730920399b199b35e" }, { "dataPath": "params_shard_396.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.66.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.66.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.66.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.66.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "4dabe1f4ff8d833195c0d93d82f22a56" }, { "dataPath": "params_shard_397.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "e5b4d1a7d447e458afa323d80e855ef7" }, { "dataPath": "params_shard_398.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "f39c4816edc105da66f9ce40b22b11e6" }, { "dataPath": "params_shard_399.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.67.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b03e71d516697d547ca52855b2d5f110" }, { "dataPath": "params_shard_400.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.67.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b164f569b95385cad2fe2d9478d42f0f" }, { "dataPath": "params_shard_401.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.68.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1191252db0e2eac001651d3e2d97d161" }, { "dataPath": "params_shard_402.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "343cba04e9574122833d1308de8d4e4e" }, { "dataPath": "params_shard_403.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.67.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.67.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.67.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.67.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.68.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.68.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "884e600079104201ecc922af2dd31c94" }, { "dataPath": "params_shard_404.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.68.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "7462709b66b07d9f482aea89730ff06f" }, { "dataPath": "params_shard_405.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "e5f195b6e5b53f70a074d118e2b9cdd4" }, { "dataPath": "params_shard_406.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "fe4c5aef67b1dced9ee15f5ef39a90e3" }, { "dataPath": "params_shard_407.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.69.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "5eda22690fdcdf61063d1f6de5b4bbb9" }, { "dataPath": "params_shard_408.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.68.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "5b2be0dd9ac73d22c297c97dc0c19720" }, { "dataPath": "params_shard_409.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "e0c536c3b224bdcc9120f8f8e98557b9" }, { "dataPath": "params_shard_410.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "d1b0fdd1677cdb2b473e89547e0c9f71" }, { "dataPath": "params_shard_411.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.69.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9a375cd1c3b92aca545508b8b7be4361" }, { "dataPath": "params_shard_412.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "785407b69cd0f98e037cc4ae81050f04" }, { "dataPath": "params_shard_413.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "c7bac8e89ea60ec874d641bcba752053" }, { "dataPath": "params_shard_414.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "e4ab18059b9163c75056442c3482b223" }, { "dataPath": "params_shard_415.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.70.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "80f4654b15fc15f3229ed7335bb5c71e" }, { "dataPath": "params_shard_416.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1195b580a43b409b31d34a9b6543f43b" }, { "dataPath": "params_shard_417.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.69.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.69.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.69.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.69.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.70.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.70.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "9366d91d0cc2b378d9e40b2c53471783" }, { "dataPath": "params_shard_418.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "f077c892f8efe1bf43513ec1956bb5e5" }, { "dataPath": "params_shard_419.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "77f2cf0abf0688e87fef040c04b29d30" }, { "dataPath": "params_shard_420.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "d11ce9560b6b890b4fb9a8222e32ca89" }, { "dataPath": "params_shard_421.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "2989adf86b731c5065b3fdf2eac61d12" }, { "dataPath": "params_shard_422.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "4e7347252846aa3b216908e3446c44ba" }, { "dataPath": "params_shard_423.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ab6276acf08135bb40515a643f5b95b5" }, { "dataPath": "params_shard_424.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "757b686e71894261fd549a31d3d578e0" }, { "dataPath": "params_shard_425.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "7fbc2caac081cb52d10bd5f4ff1e8b78" }, { "dataPath": "params_shard_426.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "cbed65e85bf6fb9ec589db7ecc59064d" }, { "dataPath": "params_shard_427.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f9eb992cac8a921b75563c8134416a70" }, { "dataPath": "params_shard_428.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "13848abeda175a7fd4073cbdc024800d" }, { "dataPath": "params_shard_429.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "b374f77c4ec51a880c9865726fab45cd" }, { "dataPath": "params_shard_430.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.70.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "a132f08b7bfcadb0a69384f7c27c02c1" }, { "dataPath": "params_shard_431.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.71.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "a67c24078e1fb9dc1575db91843201af" }, { "dataPath": "params_shard_432.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.70.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "968b16e6e279edc8ac0cf11fddb74d9b" }, { "dataPath": "params_shard_433.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "927783e6dd334deedc22d1453477a361" }, { "dataPath": "params_shard_434.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "4b84e9b3c1366059a2ccaf3df9ecaef1" }, { "dataPath": "params_shard_435.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.71.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "097fd4b96bb1b23919f7469373e7ee7b" }, { "dataPath": "params_shard_436.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.71.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "35b9fb01a408b15cebe7e74cf0a570b2" }, { "dataPath": "params_shard_437.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "3880f22792efef82415139ba65c03744" }, { "dataPath": "params_shard_438.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "d431c24a80090937acc8fb33e76fc331" }, { "dataPath": "params_shard_439.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.72.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "03720ec7c110117b1ffa989ff1c5365a" }, { "dataPath": "params_shard_440.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cb891c93b912e9d7b1dbb9ff3a4cefa5" }, { "dataPath": "params_shard_441.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.71.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.71.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.71.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.71.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.72.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.72.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "d44acc9782af5e4767c4dec995545064" }, { "dataPath": "params_shard_442.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.72.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "195c005b8750f4e76f9fb4d089b62770" }, { "dataPath": "params_shard_443.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.73.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "867a69167db5b6a101d20c5ec8f5f1f2" }, { "dataPath": "params_shard_444.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.72.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "bf94b4f249f12fc2bb250535b6bdc734" }, { "dataPath": "params_shard_445.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "53236d8132c6877e310c1cbfc54e9e0a" }, { "dataPath": "params_shard_446.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "81a118398727d44cbdb0b5bd8b93c35e" }, { "dataPath": "params_shard_447.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.73.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4e5d99deabeb86404a84b6ae8995776d" }, { "dataPath": "params_shard_448.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d39f0e9f4dba2e178886c3706a926678" }, { "dataPath": "params_shard_449.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.74.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "85973e0ceb1fe18dea59e1f0fd8e69a0" }, { "dataPath": "params_shard_450.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.73.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.73.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.73.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.73.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "9b1f3d3a4026cd05e46de6cd121a77e0" }, { "dataPath": "params_shard_451.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "850d3ccb5f742240ce3ecbd2e608c289" }, { "dataPath": "params_shard_452.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "d635c858702e4e8bdb95bcd568d10d02" }, { "dataPath": "params_shard_453.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.74.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2e510de2659f09addff82259cadb87fd" }, { "dataPath": "params_shard_454.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.74.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2565c38e516536d335803fc161aa5d5c" }, { "dataPath": "params_shard_455.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.75.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "b72b0bbfbdc0a21d1cda0708a100bd3a" }, { "dataPath": "params_shard_456.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.74.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.74.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.74.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.74.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "a8a285138973080a7253de91f70c0c71" }, { "dataPath": "params_shard_457.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "78c7f9e0e5b7a3c0f5c9e7b3c7239b0d" }, { "dataPath": "params_shard_458.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "87733ae3121439b7710150d2f466c5ee" }, { "dataPath": "params_shard_459.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.75.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "83fcbda05b4473bece0e6e080cecc34a" }, { "dataPath": "params_shard_460.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "79cc8267c77cca09bfa0c469b23f6962" }, { "dataPath": "params_shard_461.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.76.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "f27bbc6e5f588a1127cccd5c79eb22d7" }, { "dataPath": "params_shard_462.bin", "format": "raw-shard", "nbytes": 24629248, "records": [ { "name": "model.layers.75.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.75.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.75.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.75.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 } ], "md5sum": "b3d7c684c87a25c746147fdf443697ce" }, { "dataPath": "params_shard_463.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "455bde997e971447f0fff8b7f57df301" }, { "dataPath": "params_shard_464.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "d2915ed9f7603e1cfe247bf8bef944c5" }, { "dataPath": "params_shard_465.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.76.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e11abe6be8bcf26069a3414510fb72ef" }, { "dataPath": "params_shard_466.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.76.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7fe9344f3a6f91bea060639c9bf04b09" }, { "dataPath": "params_shard_467.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.77.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4d4eb9dd684b762e995bd33773f66206" }, { "dataPath": "params_shard_468.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a4cdc907305d0a094eca7e68e728c2c1" }, { "dataPath": "params_shard_469.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.76.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.76.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.76.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.76.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.77.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.77.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "56aac37bb24db17814f52c779979345d" }, { "dataPath": "params_shard_470.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.77.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "cc94f0658e34274725cb7b613fc88b26" }, { "dataPath": "params_shard_471.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "9a8988a59c31bb48dee7229b3a6123f8" }, { "dataPath": "params_shard_472.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "ea73a45c8c10ee1d7fccd22678183de7" }, { "dataPath": "params_shard_473.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "model.layers.78.mlp.down_proj.q_weight", "shape": [ 8192, 3696 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "7cb1de2869e88927bdf7fb6716a06b84" }, { "dataPath": "params_shard_474.bin", "format": "raw-shard", "nbytes": 19349504, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.77.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 4194304 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19333120 } ], "md5sum": "d53b55fd82d2c7253d1f4e42a4573838" }, { "dataPath": "params_shard_475.bin", "format": "raw-shard", "nbytes": 242221056, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_weight", "shape": [ 59136, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 242221056, "byteOffset": 0 } ], "md5sum": "6435a6c985ef37b2c11b577cf3cbf31e" }, { "dataPath": "params_shard_476.bin", "format": "raw-shard", "nbytes": 30277632, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_scale", "shape": [ 59136, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30277632, "byteOffset": 0 } ], "md5sum": "5713fffe955fb59a2150ecc9bc46a33a" }, { "dataPath": "params_shard_477.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.78.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b866b6f3be5fa149bc3a868dfd14dcf8" }, { "dataPath": "params_shard_478.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7b4ff6b1433e45d5a06ee20175b8c6aa" }, { "dataPath": "params_shard_479.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.79.self_attn.c_attn.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5233cfc1745a65f04dde306ddea32b96" }, { "dataPath": "params_shard_480.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "52698f67ddfadb528bc05412dd0becb3" }, { "dataPath": "params_shard_481.bin", "format": "raw-shard", "nbytes": 29908992, "records": [ { "name": "model.layers.78.mlp.down_proj.q_scale", "shape": [ 8192, 924 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.78.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 15155200 }, { "name": "model.layers.78.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 15175680 }, { "name": "model.layers.78.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 20418560 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24612864 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24629248 }, { "name": "model.layers.79.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 24645632 }, { "name": "model.layers.79.self_attn.c_attn.q_scale", "shape": [ 10240, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24666112 } ], "md5sum": "a39f3d632cac420042cb0f5265c2ea42" }, { "dataPath": "params_shard_482.bin", "format": "raw-shard", "nbytes": 4194304, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 } ], "md5sum": "3b37b287a9fac0715c8ee7416b9d25a7" } ] }