{ "metadata": { "ParamSize": 805, "ParamBytes": 31776318464.0, "BitsPerParam": 3.04023285660184 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 420679680, "records": [ { "name": "lm_head.q_weight", "shape": [ 128256, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 420679680, "byteOffset": 0 } ], "md5sum": "79cdbbd110de7ccddb1a2f1e20de7ac6" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 52584960, "records": [ { "name": "lm_head.q_scale", "shape": [ 128256, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52584960, "byteOffset": 0 } ], "md5sum": "3b30e9c6343f50a2014db07dfbe274f1" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "d26515cacdc92d2c5705103ba2f0af30" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.79.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "9773e2cfcf4204b5b48f3dfa8bab249a" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 23560192, "records": [ { "name": "model.layers.78.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 0 }, { "name": "model.layers.78.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 16384 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23543808 } ], "md5sum": "97ea0e537a8735c691b5badd19b15ba2" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "1a510fb8a0bc07b6002f97910f5290c0" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "dbb67e13e0e59a061f9e74374b8614ed" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.79.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "ebdd3bd97b105845fd518b88e93d7d47" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "848ab5b01e9213b9eb7078bac6d4cedd" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 420679680, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 128256, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 420679680, "byteOffset": 0 } ], "md5sum": "8b8a02a0c14245a32993c53a4df286b0" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 52584960, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 128256, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52584960, "byteOffset": 0 } ], "md5sum": "8c3f7f6a3dadd8a3303d0c97575306d9" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "7f8b25a32cc9aadfd2c87f0c0e59bccc" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "aab0ca339f72c50187d60563bbd7318d" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "c6ff248935c2a0444ae2dd7c271ebfee" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "19a47b56cded04eea6bd8c59333d173d" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.79.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.79.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.79.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19337216 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19353600 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "15f0738f80a4bb49439371f410c1e371" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "d11d3db0404829f9a95ad8626bee65a2" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "01f7adc271feac44da4aab28c152c913" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "5307321cb85f4723dd75cb15b56675d4" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "bfbe630a8d9fc274c65ea640e8c4b9ce" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 3358720 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26869760 } ], "md5sum": "fe37cf5e2fa3bb26ca8ee83a8614feeb" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "923e7983173e81cfa259d0847f14bb26" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "3c8b1dff4503f983f40f44a4a22b313e" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "d4bd3675e51b116621a8a011f6d699d5" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "4f0cef489166fa6ab9fb029a57378caa" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "df679a5b853ce3e321c6dfdafea0dc46" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "5b2b8f8f31eab8bbfcf87df00b99044a" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 15155200 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26902528 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26918912 } ], "md5sum": "3f3ecd05187d099ca4e20d8c7bedb41d" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "d41f6c5783de34e365e2dbba56b25f3c" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "7b18aab19993053ed9551324a6858906" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "e4c1068f01d183490d7372752ade7ef9" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "fa22a7863061096dec277d4a89f91f1e" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "73b0b0a93dfa879ba774715b5d3fa7bd" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "b4fc491a0098db4977186c355f87de99" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "9fb7bcd6667ad0c5110c4cd693042f93" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "997a2bd68491cd10b9a10f260d697587" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 30269440, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 22695936 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 26894336 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30253056 } ], "md5sum": "aef0cebb913d91bab8ce78f846d6a95d" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "b706efd7e7d1c0106a9f40a209258739" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "e2d9b9ae1379efe2807060367cf2d747" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "1c6fada991f4f1f518851779aed84e85" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "ba0df813f4dccdf3e2c88f9410f59219" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "5151cb5eeb5e05769a472ecdf9109a44" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "71cafe70bcda9498f5444b4edfdc83ea" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "2548b46e686a4f0b50b6d42472179bdc" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "5986075182973cd3232582df3cf76ca6" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19337216 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31084544 } ], "md5sum": "7fd8f355a847d54bf02fec12ee46d199" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "44a70e1349748590627060af2a185617" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "6fdee311826ae7ea9b67118d944be5cc" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "e79bcac901b9a0ab7a7909847e5a9391" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "a40928f43a5d9a852b22c1989627a06a" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "35d829fabe37c05328d8a6df1abe1a56" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "cfe12b30e5fd3a89341759d82d42504c" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "e0c1555ceb6344e3f371ba570863940a" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 22712320, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 3358720 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 7557120 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 10915840 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 10932224 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22679552 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22695936 } ], "md5sum": "4bf6020ecd81823542a0a41ada8a1fd2" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "786f85c1ebd158f971491899d11e3bf8" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "4d59a1fa2e202f27eac32df2a59a20c6" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "5eecd03534f7741dad4ec3a432ae7d37" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "79234ad34b7a52ad5cec4c0328830c62" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "f0dc38fd84d068ee5f38faf2f1ffcc5f" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "9d4134b5435016af5bccc23fe7a0cfce" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "99d390fba628dc64fa45f40d3d76d43b" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "b81bd66c27da9f57acdab6be90416ebb" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 } ], "md5sum": "3a3bd09d413acc4116a7fd2b2259f323" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "c81dd00c58969dc75fb8e1a86e43abf8" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "8b3440facaf3d7275b56508a22240207" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "cf7b1f5d9e53b733eb8be487b494d451" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "6de0c2748db511fb4e5d7ce19139d3cb" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "b3cffb60ce6ff69337cdfa4dc9c2906f" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "17ffc912a36a34f4bc1353d779ece73f" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "baaf3865795914ffd5e69929831a80f2" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "6404380796473807fbdc7152f526b579" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11747328 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15945728 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19304448 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19337216 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19353600 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "d97355eac85d792dea55b266f19ff466" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "6d438b6a644d512c50643b7f4776088e" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "4877f8510bdb561cc117d03c65917dc7" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "8e40f3b558fef0f2a2669f875a336622" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "1a868ac5b01039db87036e0c941e46b8" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "981686406ea2d09312a04d7f5b26ddc4" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "91947b8fb9be6be4cb215870f40eab6a" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "41d4ae8cb0059f0c7b2c700cfc0fd0a3" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "d74a13aca7df657acd576dce6f182852" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "997dc2bc2e129f5b0e797f740204dcc7" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "1dc3060fe44eed68b54830e1b9731c43" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "769e6144f46d16eb604e721190eb26ee" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 30269440, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 22695936 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 26894336 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30253056 } ], "md5sum": "fc366618075569034e2f59183ce8c30c" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "0e6a105cf15cb0c547535d6c9c0974b5" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "6324be8a130018d1cc8274f797c410ff" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "f71fcf9cf7988bb6e99467b8de86e48b" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "c5b687a1a23261cfb435745e58a3cce4" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "86a9e3385e7fe2bb3c3819665c0c9d5e" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "5b9802b08058faebc45b566504427d59" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "3db626a032756e48eef4e1115567a999" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "79e720c08f5b3466ff36197efe6a5964" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "1a7212c6f08409c8e8393c5752ad11cb" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "753168b7b51367ae39138f040d4d2c41" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "6b633b0d948072e6e981e848bce11c81" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "29e7c5e36599ebd31a2b4a7d6f26be20" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "67d0b05827f9d687ee74ba79a0530b9d" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "8fbbdd744f45e2b746609d9e6bedfbeb" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "5e5c81ca424328cf012f0004be6a21eb" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "95ad040ded3917dd3861857830b6aaf8" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "fc82bcaebe01c980169c70c6ad2e93ac" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "ecd2a1101025a9faabdf334952eecf95" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "4bb819d2e13c00409adc28aab0b510d9" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "de377b3b356e83bef455863cfc28d398" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "5b0ba12dfe7c6894ae454c72e9efd669" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "eb00c8b84a63f636acc029ae0f9ecc13" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "cfa68462879c582c28a1c35cb0fc5834" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 } ], "md5sum": "1da3b051beb2fa538ba37c41dbcd36de" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "1c28b8e1f0003f8809aeffcdfe818329" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "9374df83689bfd6cd91025dd7327f9ff" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "fdd0f954cd577ad16d52db15f9632768" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "eb7e5a1857b1859cc5221ee564c8b4c5" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "b14e804d334570f9a810767467513bee" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "f6ce252694a0035e7812268dcdb56251" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "7b07e579db29edb4c9432f3f64079b19" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "74d666921c9593f2769325a632d1446e" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11747328 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15945728 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19304448 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19337216 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19353600 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "7af4d699fbdabe63b945c5a559b65096" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "3a848d986e863b5a01b4c7b43ac3dd6a" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "2ff831476fb962e74acbf0f54c4ebfaa" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "956ca780394e046c0433edb869c8687e" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "1b3f3496d7e28475699dde1a3034613b" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "578188450a9ddc80bc1e2ff09e82f892" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "1bad3541b871dcbe3c3ede23c4336914" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "72c81c7c1f0985931f8d3c2f07de4edb" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "381b683c931b09e2f8846b0976e8bd40" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "266b513ec0139c480bea67e39f01e70b" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "1c24cd44dad061da0de4ae880ec8a496" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "12332dfcd1498294d9c5b17753f7c0a3" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 30269440, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 22695936 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 26894336 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30253056 } ], "md5sum": "dddd0104cb9bb2e2d4b7527d6f55f8e9" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "24d37b4fd73b6bf9c1ab2d6ec4728185" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "f8a70be853e802bc5b7ff32a661b793f" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "7c57807130e09128a04ead78b3168476" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "e8b7eb09744f044a6ff03b1de912bfca" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "e19902b521e51c43523c9bcdccefc4e1" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "eca339298e242d9411e63c4c0d22396d" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "3d05d15335c789c6f087a7a3d73a8ca6" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "773177380d1cbbde15bdc84cf6c7a0d9" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "dba6854f207f569e3deb3a20ece5b851" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "2e5a435b4e912e7991d9749fc0f70db0" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "3239e2a2931fbcdea6e619fdfa5360cb" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "6b0182b961835a355856c9a577c9c45f" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "0e40fb395dcc15dce93255d86e99d02f" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "f2f6c1903fa7626050f7280bbd8d8f10" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "b27634489e2892a7e3b7c0a4a64ed9e1" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "b92308fe810bc97bb10bb952961d5b65" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "05d45871215b82178905b99e511d4279" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "7394fe3e52da6535637c612696525520" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "23593635059e0b27f9cd9e17dd8e3de1" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "8722fe9c8c457bd3c50da5e7a8e3627a" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "80a787725d3c4568b2b1201954fd6497" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "660e86ef30ef07240dadd412b52888c7" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "e7f7fc85e689538483fbf44285310f0b" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 } ], "md5sum": "8b73c19ec9e6581c0a659120bd8dc9a5" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "2824ffa1944266b10c64193f503716d2" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "700f06f2d3c634bc002f3847ab1cad9e" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "c0a4dbfbec7b0b3833bd6b94afc50f5e" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "6dfb68d323534b75711aafbe130dbbc9" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "eb0e80d58a6f9247718aa7844eb8fcb3" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "b47f6c42aab0d41e6f152f92fa0fb885" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "0425ef56c1e56088ac47695ccdb929d5" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "df39c9ed423e9b398be041dbe1f581e9" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11747328 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15945728 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19304448 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19337216 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19353600 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "0689cff3f2515e0beb8c7a998cba9f88" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "d9a520e6da4632266591fa10ca53d2a4" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "cdf5cc9e3de29942a53479940c624078" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "9e5049be621b44a564b696588b8f1bbc" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "dce424a56611414041ff8eb2554c18db" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "1d8cffd5e9333f5b8ea63e8df318b27c" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "5082e15d5ebca77ac6c8a3ceac7f1459" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "76f108a41151ae894a945c37ed433147" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "4f28d5c383f0566d8835ddb2d054779d" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "1a6f03334748e5468155dc3a504edef1" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "b1d1caf1ea71f19f5ca739f6d1bfae0b" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "52bad08f6390fcdc5b583dfa7ef5f814" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 30269440, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 22695936 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 26894336 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30253056 } ], "md5sum": "0a6c8c0310138da641692a9a9f2396f3" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "26f8a1d6fe21e1743ad3938a37dc0361" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "6bce3983899c0e75eb7ff2b82d338cbf" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "a613331d96b231a919c7626564d5c40b" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "212388e5faf198623f4921a99411e5e0" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "37c1724473f31016f7244e1d34ce624b" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "7ae6499c9e97b04974e6e7e7f07239dd" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "03cd684d0338ad174fed71b5795960ce" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "7dc7d4180b2a6e46abd6f2144d61b47b" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "992b7735c64764dc77166dda5402f8a5" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "0667b4bf645c637fb67ae0dbc0710312" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "1a8ba97218364506c4cdc4ae6a24003e" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "d55bcfaa3cbebce3ca12c7db5e20b289" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "23cb8818f4f4a934a3d1e22b12b6eba0" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "23176f615c4470b687f168172a824885" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "c417cefb15a0945542ee93cef1f7e077" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "d89bd353c3d5e1204643a3745ac28d9c" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "eab026b2599bfc077ac7fd1462315f38" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "772c3d081b37e10603459fb658356c7b" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "e42b75252c07eb4b8ceb1e92e91da3ad" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "a75d22220dbd14a7c708429dce3825e0" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "06845578d2eb522e82e469e3e13a866b" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "75b11e529ed20747113f48c4dfe04224" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "7416f91d2e268eb6c38087877acb5e06" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 } ], "md5sum": "a5c217524578836b401ea9a62d54e352" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "87f7c7a427fa58cfa158d43bbffbdd84" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "3b7d93a987ab9cfbbfdd2e4f63f6d34e" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "6826ff395dcb1df85e0658724b28572d" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "44289e8724a6baec009f140dc03a65b2" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "a032cd17aeb344966c8c06dbbc9d713d" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "5d2c757e48cf3d48ef4a7a39f099af06" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "9013c3b733ec902807b27ac2b9bcdc64" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "d42d2ea8309d0553169dbf47e9c85f75" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11747328 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15945728 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19304448 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19337216 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19353600 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "9ac883c299207434d37b6c374eea1fd0" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "873227703e4c7196644f46cf1845ec35" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "2e960fde7ffe35935b0631cc0b164554" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "4212e7fe78e6c13378569700ebfd42af" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "14ce2f873f1dad4d2bdc6e8a05b7f9da" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "66529bac39a8a71a5e93a681c788efdc" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "3a5c2599bdeb07000ad49a5e0bec73a2" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "6464db6bc51c16683a9c96eafa84ab98" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "aaea63a5a1255df7636b10feab234716" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "32663a6c921b7811a8a269ebecb5cabf" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "714adf75f3902ae936c3abebc55a416a" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "dec07b0616bb6324a4a3b385ed97bddf" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 30269440, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 22695936 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 26894336 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30253056 } ], "md5sum": "bbaaca25326cc0ad46c8a2a23fa19a33" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "e61380313bc0ed44660c1e9d90a210fb" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "be54f6c3db9c9922a00c9b84a3f77dd9" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "357a5bc9fc484f7ade2c5d8edc8bd41f" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "e95d455951eab9c83c13ba605fbb91ec" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "5330466878c1df5b0f3ece706d6afed8" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "f3731c93189b55a8eb18e38f26950f95" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "d68d88ae0c26b0a7c141a7411d718207" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "93c151f244380e53a0b9b9fa4b767621" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 } ], "md5sum": "b951a4e41f8384fd77a68b5c5017ae31" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "03a24f6a5a75e29169d53d16a88d0237" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "74f0b3bffa2213a3ac54bdf9a589d23f" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "00dcd6dbc9858fe5eb40d7f8d9145ac1" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "5406cc90256687c18a1803e13a8825ee" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "43861b977db9d4f190524bd3d61a4798" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "e9bd5d96733c0293f653660d9e304a08" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11747328 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15945728 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19304448 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19320832 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31068160 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31084544 } ], "md5sum": "04c01ceac3dd1e509735028c38a815c6" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "36f9e57ba2e621cab9affecc4ac662e4" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "403a106757502ed0b46817d7be4009ef" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "a3c24076946e24f8d41c5463e228c519" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "8ef01a95baea0d736c33c133b5977a9b" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "cb8318bd3628201ad8744fea60010f3a" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "8dbaa4c8083a5563ea40f70d8b5c0357" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "07a487de9b5ac7990c8ed20477e6a707" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "b06e901d82c2604636e2e7e340614e04" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19337216 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31084544 } ], "md5sum": "eaf06ac70fa7e684155b82089beccce4" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "43577e92be4e147b3e15305511e2488c" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "e1ecb81d57765f5a1efc8e6e57f0009d" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "b11352ffa4a96e702a9948d2265dfb00" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "822b1e51850abef5071abbb3dfb316a0" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "6955b360ac850e4e85b0563a86d4d339" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "4239c0db4b3af25c3b765d078b3761ea" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "05c80ac952b5b9214d2a0b0c13784e80" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 22712320, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 3358720 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 7557120 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 10915840 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 10932224 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22679552 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22695936 } ], "md5sum": "7f713372a0d1409cbd310f6f00132778" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "c6ba4ac0704c43bc324c84488fb5a944" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "b887d075cce225e5b35086510bf300e0" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "ac8973c20805e53484cbcc4ac27a08fb" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "41b20571304182fa5896b916a1ece39b" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "2a6108a82618fc59a05cce71e7546a67" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "9836a7649000608001ce8bde6a05f4af" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "8022d7a1b5e04b865c881356d860c5f4" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "148befda1fd7e6403deeddc15034306a" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19320832 } ], "md5sum": "0bd4ff406851cfd7a9151cfe42697396" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "c03d55a203686871f6d7b60e97d57d02" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "b110f9b45ae84ef616cd9a65d72cad4e" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "ed96fed3fa7a82e304ade31810008bdf" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "5a14b81aa75ff62981c30ba4780c4ee2" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "0a86fc2231bf62dbef2d3354a5ea4f59" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "229666f794314a9d177daac5bf0d4f4c" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "e4012d294941ee616db4711830267a85" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 22745088, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3375104 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3391488 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3407872 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15155200 }, { "name": "model.layers.47.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15171584 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19369984 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22728704 } ], "md5sum": "2f65e805db5870fddb4a17dce9af474d" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "e18a382f63e143ec496c8a449b31d457" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "c06282590d1cacf6d27de54d921c2ab9" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.48.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "4fefb774af456d3713c6b0aee83e7d77" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "0075fe6f9ce2fdc397eb9c89ef6f62cf" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "7c0acd18bb631f09941f63770b69e061" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "47e88d5837c58daa37c10e2ba0af5c88" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.49.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "ef4a1feb7423fd577096609050959438" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "a47d3dbc282682ad1677b94a175f6741" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "b1017edadb24a6d36203c3712077e1fa" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.48.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.49.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "823051c2abe2c39eaf7081e0088409ac" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "c023c35f7ad765a0888445899764d0f7" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "a8fff4802381c846c18084f9c4e8471c" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "2fe10e4c799b034a8b1876121a891b3a" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.50.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "cfd458e4a39ab69c8e862055da856dc6" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "939addd40f0c5f02818df711aa5e9d54" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "8bdc61b0f8acd712ea359d86b24e1968" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.50.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "33948650bd6c848399e709279bce1f11" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "2f2969bdfed5cba5af00f16f69d78976" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "2c6d541d1d3a26221452e5c4bb83528d" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "fe54f44b419b5f7bd3bc54aab558dfe8" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "a2953aa50f7a97dbca7e08d0fa302b56" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.52.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "32aade19740d8eeefada5b20e442ea39" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "8e18f21b6aef5eebfa039dc12ceade4b" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "9a44675e18af0010def34e39676917df" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.51.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.52.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "5ae1542b61d0a287d933f36affe026ea" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "5280a9eb615dad44b3369988cf74c326" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "6a7864ff1b0ebc92152990b9687b9dc7" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "69bc456d0c9b6acb41d1d6204a952a04" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "4b102ac082934c125d428369d0c941b0" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "ac38830c882618d58279eb2d17c0b847" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "967d789d704139ff390c4af07d777dce" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "b0a969cc2fcf9e66e10f560321562be4" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "ed524df3858e4b312a4f674c3d37c37f" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.53.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 } ], "md5sum": "e4648f35440cd4bb8b182f11077605a4" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "53dd5a339bc5b82030840e865bea16ff" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "9729372cc6f887e619300ad72aff63ab" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.54.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "9dcbef92598a692557908883c9caf9b6" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "423f9dcfcf2d8cf6d2c0d1b5bc3e8042" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "d19d175441d3e379fbc6975b552d6563" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "0f905075378454f091d25d3e1be40335" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "b3e3e67e331e689fa524583b6b539e3e" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.55.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "6335c2f56a52067d54ea4005922a0a51" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.54.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11747328 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15945728 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19304448 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19337216 }, { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19353600 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "d10aeb2ede749393ec1d35300a7b2d38" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.55.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "43e847852826ccffcb64e48b3deb08ad" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "f09f9fc0c51679480931abb966f2de76" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "d5e5f163290530b7ac8d8b49ac535eb1" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "e5723a941813983bacaa930a7e436b56" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.56.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "2f45eb5ef94bd9c394948d4253718b30" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "082dbd406a27ffc2764078255c5796df" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "6e78d1a47d5b9054512f24be728117d8" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "bb2e67ec9e635552b66ee9a5203ef607" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.57.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "84431d1994daab9df464b876ca251056" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "bd506c35068abe02df32c15165baa2e2" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "3edfee91d3d175eba7e7ecc9e779856c" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 30269440, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.56.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.57.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 22695936 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 26894336 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30253056 } ], "md5sum": "fe91a98ce3664d028e81c701ab8643e5" }, { "dataPath": "params_shard_324.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "8704bed8be7c51080ec71250a68e28a6" }, { "dataPath": "params_shard_325.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "5415f7204e0b877a39a62a2bcd3ce023" }, { "dataPath": "params_shard_326.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "b84d359788b81d0ea451f03da6df146f" }, { "dataPath": "params_shard_327.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.58.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "729c8a1a7ad6121ff6c342d8db9e2831" }, { "dataPath": "params_shard_328.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "3eb7d87acc331959ef3de169e897e40a" }, { "dataPath": "params_shard_329.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "00346000e65399ca408b376e0d589b51" }, { "dataPath": "params_shard_330.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.58.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "4fe7a96a58fd2f0d137b2e7accf48d13" }, { "dataPath": "params_shard_331.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "f7525b9a8b6499ae94f612001a60d666" }, { "dataPath": "params_shard_332.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "1830ebd4939e5ede49db7e1cbae19ac8" }, { "dataPath": "params_shard_333.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.59.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "bf3df986b47ba7949bcf434717656b8f" }, { "dataPath": "params_shard_334.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "3830f6d13ce585a7b31faf54af2710e8" }, { "dataPath": "params_shard_335.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.60.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "c0cd1af357694d9fdbd270f062fb0bb6" }, { "dataPath": "params_shard_336.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "f0a71ab1ea94cae956e14f87d51e393f" }, { "dataPath": "params_shard_337.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "e260052e1cba83b60998a99f099ca8c1" }, { "dataPath": "params_shard_338.bin", "format": "raw-shard", "nbytes": 26927104, "records": [ { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.59.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.60.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26894336 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26910720 } ], "md5sum": "e8c3d2080f4d9200359117d8b8950d6a" }, { "dataPath": "params_shard_339.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "d892481e8fbdf0b644a9dfafa1237606" }, { "dataPath": "params_shard_340.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "466778a2425f6304f81b0e7ece762b0a" }, { "dataPath": "params_shard_341.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "8c9a34e81e8a8ca270d5a8a2e6969954" }, { "dataPath": "params_shard_342.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "6ca5e626f18ab2eb034604716ef28e16" }, { "dataPath": "params_shard_343.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "2f686831e2a29248bf6917cb7d6f89d2" }, { "dataPath": "params_shard_344.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "c0141da5f6d605d8f81876a3dfc1aa3a" }, { "dataPath": "params_shard_345.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "120cc1955da8583293bb3964e3444550" }, { "dataPath": "params_shard_346.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "7f124516393869e2e3a1cf9d4b00da14" }, { "dataPath": "params_shard_347.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19337216 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31084544 } ], "md5sum": "9a0cb8394e377b59d352b143f262af26" }, { "dataPath": "params_shard_348.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "96464b067bc5113df6e99aae8dbbb800" }, { "dataPath": "params_shard_349.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "5914c4576be9d1ce5449f83ef6f0d581" }, { "dataPath": "params_shard_350.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "130935f8515181784dfc486dbbf94c5e" }, { "dataPath": "params_shard_351.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "5f7da79f72f32e5a63b3e65ade50a91f" }, { "dataPath": "params_shard_352.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "4b1a617c8fc2484706d13a56a0cef68c" }, { "dataPath": "params_shard_353.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "891e699e0b4cc457cf40d2fca0c8e6f6" }, { "dataPath": "params_shard_354.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "29476a9fec297dd6d9840cd0995cd599" }, { "dataPath": "params_shard_355.bin", "format": "raw-shard", "nbytes": 22712320, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 3358720 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 7557120 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 10915840 }, { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 10932224 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22679552 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22695936 } ], "md5sum": "99eab82dfa6467dda3881edae4f623d1" }, { "dataPath": "params_shard_356.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "42035b12686cfedba5b372c293bc6fe8" }, { "dataPath": "params_shard_357.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "672b894da9e778b7a6c4cf1a9b12ca42" }, { "dataPath": "params_shard_358.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.61.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "29d94554bfdcc61fc09abd0ce1d86fe4" }, { "dataPath": "params_shard_359.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "0f657b6bc71fa623261de83e27d1bdc0" }, { "dataPath": "params_shard_360.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "d0559e6b39b6d8527fc78d4c65a33482" }, { "dataPath": "params_shard_361.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "f71c209c887020c84156c2d37e96f4be" }, { "dataPath": "params_shard_362.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "b0c8901321e7adc387aacd65d3d0b7ca" }, { "dataPath": "params_shard_363.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.62.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "bb3e9f74721367663852b670b9548685" }, { "dataPath": "params_shard_364.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.61.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19320832 } ], "md5sum": "354f0a90017c97d1c1595c544ec613f2" }, { "dataPath": "params_shard_365.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.62.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "b804a66723d8d1e4d72ef5b42e9e4c8c" }, { "dataPath": "params_shard_366.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "ca9dbf42fddbe53d621803e7877e72b1" }, { "dataPath": "params_shard_367.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "d7eef102d3f7a52ce96f72c22ba6aabe" }, { "dataPath": "params_shard_368.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "fbc97e73512e693b0668330e0d44c63e" }, { "dataPath": "params_shard_369.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.63.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "cf35cd5bd8aebaf1e087c382265bd910" }, { "dataPath": "params_shard_370.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "c13eb6f277252a752831f2c0b6c5dbd7" }, { "dataPath": "params_shard_371.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.64.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "3d37976b021fa32d3a7f78f743154e5f" }, { "dataPath": "params_shard_372.bin", "format": "raw-shard", "nbytes": 22745088, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3375104 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3391488 }, { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3407872 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15155200 }, { "name": "model.layers.63.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15171584 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19369984 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22728704 } ], "md5sum": "6028e77b7e79a9030a06c2a93d4bbf98" }, { "dataPath": "params_shard_373.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "7b49c6b7cce3006fe0c1e94e143e9e05" }, { "dataPath": "params_shard_374.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "2ad9036b5d409ab3841b69a5589f4c62" }, { "dataPath": "params_shard_375.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.64.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "0a15b0f1076f11ea17dae944c1d38dfc" }, { "dataPath": "params_shard_376.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "ba8b350781be7cbef6b13e8bc0d625ac" }, { "dataPath": "params_shard_377.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "c5a47acf1055aa7682c651463c1816be" }, { "dataPath": "params_shard_378.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "f1078ed1d6de9a20b5583d3de00af37c" }, { "dataPath": "params_shard_379.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.65.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "8c854c1858ee6254add8f9130848e028" }, { "dataPath": "params_shard_380.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.65.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "4a6dea1bd3d08e41d670b145fc1c19de" }, { "dataPath": "params_shard_381.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.65.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "cb5394e9177bef396273ed9bc8a9f560" }, { "dataPath": "params_shard_382.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.64.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.64.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.64.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.65.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.65.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "9afc93e010c5608d36500016194f9777" }, { "dataPath": "params_shard_383.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.66.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "67616d2d4e7c5b22c6c1f6fa4329f9f3" }, { "dataPath": "params_shard_384.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "dd85e014d079911688748196eb9392c5" }, { "dataPath": "params_shard_385.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "62f6e25c90db2f99d663628d28721752" }, { "dataPath": "params_shard_386.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.66.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "a47b6fe8b9e9f7ba02d84b5110b4dc18" }, { "dataPath": "params_shard_387.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.66.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "fb03df64c7158a1603419f4f4a0a283c" }, { "dataPath": "params_shard_388.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.67.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "9e6246e0e6e4fe6b93f58c2590abc5af" }, { "dataPath": "params_shard_389.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.65.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.66.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.66.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.66.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "9947c5e97d08af1b48a7ba69526f2ed5" }, { "dataPath": "params_shard_390.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "697cd5bbf114a78dd9e651124a94321f" }, { "dataPath": "params_shard_391.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "2b734de865f521ce9d20a26d883be06e" }, { "dataPath": "params_shard_392.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.67.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "4c64257f8995022c1a18a36d12df6b5b" }, { "dataPath": "params_shard_393.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.67.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "617ac5ec726a9e4b4bbbdb90e53b791a" }, { "dataPath": "params_shard_394.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.68.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "bc93a301962fb504c49cac8a436b5337" }, { "dataPath": "params_shard_395.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "aa5fac54d6442e81d6ae668d8c2ad632" }, { "dataPath": "params_shard_396.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.68.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "3628011d6eb20700ae0374ef393b9086" }, { "dataPath": "params_shard_397.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.67.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.67.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.67.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.68.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.68.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "100d99f7360181b7d362299f27ee9680" }, { "dataPath": "params_shard_398.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "b0835353e4b7f23a789bef19649ebaf5" }, { "dataPath": "params_shard_399.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "ba62d2162acb8f97d23dd00bab6a5d24" }, { "dataPath": "params_shard_400.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.69.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "2ae359739a6a54f89b188d3edca7340d" }, { "dataPath": "params_shard_401.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "26cd0282a3ff76f19cef898aba8d00f9" }, { "dataPath": "params_shard_402.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "f7629ff22a0016e91380261d7af4d6cd" }, { "dataPath": "params_shard_403.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.69.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "2f1c8e3b532830a05da81740c2e3a03d" }, { "dataPath": "params_shard_404.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "065003048cd93eda86ca0a88ca599f79" }, { "dataPath": "params_shard_405.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.70.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "642b4dfe466577842be3756d7fe23fc4" }, { "dataPath": "params_shard_406.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.68.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.69.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.69.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.69.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 } ], "md5sum": "5935479589d0f714899721858b09c22b" }, { "dataPath": "params_shard_407.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "24cc9f8253da15ae3cc5430b607f1318" }, { "dataPath": "params_shard_408.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "6f6928b3ef439d7992ba7b5f2d4b4851" }, { "dataPath": "params_shard_409.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.70.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "99ab880d5207d01705b8f9a915752cc8" }, { "dataPath": "params_shard_410.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "56c80fbd5ef18ad25e02b33a96e1ea20" }, { "dataPath": "params_shard_411.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.71.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "89750bfcd55db578f0f2f23053673628" }, { "dataPath": "params_shard_412.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "81c69e572e955f3db33e6f046684f061" }, { "dataPath": "params_shard_413.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "5a51ba5e87f1cc86eb27c61e3ddd119c" }, { "dataPath": "params_shard_414.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.71.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "bf4fa88511d6899b206ab3cd38b2e984" }, { "dataPath": "params_shard_415.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.70.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.70.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11747328 }, { "name": "model.layers.70.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15945728 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19304448 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19337216 }, { "name": "model.layers.71.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19353600 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "7147cc80b8dca52a62ba3aae8a304588" }, { "dataPath": "params_shard_416.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.71.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.71.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "fd27a15e337d9fb0c67a7fdc00b1890e" }, { "dataPath": "params_shard_417.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.72.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "f193b17ddbddfe3f355cf3fbe21ab6bf" }, { "dataPath": "params_shard_418.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "06816a76862e70380046717467428e01" }, { "dataPath": "params_shard_419.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "d6300111346c9cf1490f0c3f16fa55b0" }, { "dataPath": "params_shard_420.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.72.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "9f86a764a915359c87d3260a752faac7" }, { "dataPath": "params_shard_421.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "6e79a6fac4e4f3626a92fdb6dec3c12d" }, { "dataPath": "params_shard_422.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "332006abc36f8a4b3ac81c9b63b8334a" }, { "dataPath": "params_shard_423.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "6d6b476164aeaf6b43b2a25a413d0171" }, { "dataPath": "params_shard_424.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.73.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "da90f9272a6c3fc705f8b64fec65d9ae" }, { "dataPath": "params_shard_425.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "adbcb0c20de29e267fe14cb79d449685" }, { "dataPath": "params_shard_426.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.73.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "845b1e1800c0ddd180b384dc39d36bfe" }, { "dataPath": "params_shard_427.bin", "format": "raw-shard", "nbytes": 30269440, "records": [ { "name": "model.layers.71.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.72.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.72.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.72.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.73.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 22695936 }, { "name": "model.layers.73.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 26894336 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30253056 } ], "md5sum": "7c85c844a8166a0c0d0590d3b30ef8fc" }, { "dataPath": "params_shard_428.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.74.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "2423d22579c6e247f8ca145b7c36c32e" }, { "dataPath": "params_shard_429.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "52c1a5c0c88b888606b6298b928a8bb0" }, { "dataPath": "params_shard_430.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "8a2c9a621ae118e839d9c83c619a998f" }, { "dataPath": "params_shard_431.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.74.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "fea492f01374fd5184834da541913fd8" }, { "dataPath": "params_shard_432.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.74.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "0d89231a167792fb430215e05c902b76" }, { "dataPath": "params_shard_433.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.75.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "18c0ad28bd2322f63cb20d2cbb209605" }, { "dataPath": "params_shard_434.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.73.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.74.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.74.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.74.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "65b81a7bbcde74b4012bcc2f2816a728" }, { "dataPath": "params_shard_435.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "a5537476b070c560efff291bed207573" }, { "dataPath": "params_shard_436.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "670c027fd6e33a536095bb11428a5536" }, { "dataPath": "params_shard_437.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.75.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "4f9583f27309e77a231056272ffaabff" }, { "dataPath": "params_shard_438.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "1daeb9b2f2df7586724f3257077f8f08" }, { "dataPath": "params_shard_439.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.76.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "9fa7d67eb7f141b333f3b7726c374fe6" }, { "dataPath": "params_shard_440.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.76.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "80e63d33784b6e061153fc217d0ec2e7" }, { "dataPath": "params_shard_441.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.76.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "a37a6e215beee6a2e667359680b29b88" }, { "dataPath": "params_shard_442.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.75.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.75.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.75.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.76.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.76.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "4370c163452b4aeb32ca917464bf22df" }, { "dataPath": "params_shard_443.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "fc3bca1a5e165c34fc67e6bce9630b12" }, { "dataPath": "params_shard_444.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "b97ce0ec91f1e37a3d007339c445f8fb" }, { "dataPath": "params_shard_445.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.77.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "80dbe6ec467435f3ceb7440897e52333" }, { "dataPath": "params_shard_446.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "e868c8037b0741f9859225dcbd61e52d" }, { "dataPath": "params_shard_447.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "fe50c870f5c2e369b0fa1f31b50a7a3e" }, { "dataPath": "params_shard_448.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.77.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "4051cbb9cebbf4b132699f92bda92adb" }, { "dataPath": "params_shard_449.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "2cd2d4027ade7f7bc7900de94d98c350" }, { "dataPath": "params_shard_450.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.78.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "66131341a5c1a3243d41172c824fb71b" }, { "dataPath": "params_shard_451.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.76.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.77.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.77.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.77.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 } ], "md5sum": "bcb126e76515768d65cd754969b539b5" }, { "dataPath": "params_shard_452.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.78.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "27d3472015561af4ef4a6d0a0fd89d9b" }, { "dataPath": "params_shard_453.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "27f0d56babad6f9424dbea2abc802f01" }, { "dataPath": "params_shard_454.bin", "format": "raw-shard", "nbytes": 19304448, "records": [ { "name": "model.layers.78.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.78.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11747328 }, { "name": "model.layers.78.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15945728 } ], "md5sum": "6b4326e1f36ff06bcb5b98005fbf72a2" } ] }