{ "metadata": { "ParamSize": 283, "ParamBytes": 1656834048.0, "BitsPerParam": 4.069857841273854 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 197001216, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 128256, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 197001216, "byteOffset": 0 } ], "md5sum": "2b3bbec9523c45b6f2f7b3cdf47963f2" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c716b9d3ba2ed3a9d7826b6c6952c4a1" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 32913408, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 128256, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6156288, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 6156288 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 6162432 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 18745344 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 19138560 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19924992 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 19931136 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 27795456 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28041216 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 32759808 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32907264 } ], "md5sum": "888f83ec1226b13b841c813c5c09b6b9" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "991777affc0049173f7360d92186e17a" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "69fc4d3c7d15657eb141f540b5213b89" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2c574acf9b2789043ade9a0431a92d37" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "0fd9a9bab057351a2c6ac9340c2dad99" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8bc99606e50b0836fb0494e45c83a043" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "5a024a4a88df248d5fb7d7fb4d86ac6e" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "49c9cad5a5f461f7a687895acf6111af" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "5a4b8a15fc93a6ace650d74501f20bf3" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8e57da701f2bcd9fd959e4920abcf776" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "c0f3c4cab26f6d93ff7d52f6925b8f53" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2b096448a45486c8931545b03d534f31" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "adf47ae11dc35ba87e757e3f9e7da73d" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a049c32525bce97d3c77e8d8abbe08f1" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "8621fc4bfb8194e230c9880ba3deef2d" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "930370c0da11c6f44ea33a94fab10929" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "dab0a09235390adaf18ee12e96ff8399" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7c627feb0b5db22e95d4ec3c4306b01e" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "bc307ff64750d2ec5a5f3ff13c3e2fae" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "79784b78745773537248ba5314f0398f" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "64d856457a5ab6c315742737d1880c53" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a787639de2cf23c496545826ebe27b86" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "7c1456f73af0c8e3fca55a9b70fe177f" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e2d9009233b70c566ba5bd0c55a4d886" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c252de9f266a9e21b4e6b40de0c6c06d" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 27531264, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 26744832 } ], "md5sum": "85df326ecd8cbd7b86864e21f5a289eb" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2069436afa758ab8fd6ace8cd4804768" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 7864320 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 8110080 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 12828672 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12976128 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12982272 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 25565184 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 25958400 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "e979e304a3221635bd87046b2f835356" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "bf17c6fac518de21e2efe5da6fbfa26b" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 7864320 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 8110080 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 12828672 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12976128 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12982272 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 25565184 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 25958400 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "29bbd31ed85aae6e7a49d50dfbb42744" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "681251fbf38db6f2c4741e53600e129d" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 7864320 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 8110080 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 12828672 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12976128 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12982272 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 25565184 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 25958400 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "09a3e0633e43eebec3cdbb59249d3694" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "9472daaa7a2bdaab2f124348f93831c0" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 7864320 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 8110080 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 12828672 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12976128 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12982272 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 25565184 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 25958400 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "9a2982b5204d8cfab99bcd0841d0ce30" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c678ab8dcf213457ba27de10b4ceb6bf" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 7864320 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 8110080 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 12828672 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12976128 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12982272 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 25565184 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 25958400 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "f75ca59485358baf9146c5bce531368b" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "43ee5f8e124068e91ccc353fcf0f74db" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 7864320 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 8110080 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 12828672 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12976128 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12982272 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 25565184 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 25958400 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "6bbce9c5f5b22ad77b4f5407c0da5a01" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1d8e4a9edc739ab0d25720cae898fd56" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 7864320 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 8110080 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 12828672 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12976128 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12982272 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 25565184 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 25958400 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "b942839c23fb6d7d1f263af84ee832f7" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 25970688, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 7864320 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 8110080 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 12828672 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12976128 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12982272 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 25565184 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25958400 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25964544 } ], "md5sum": "d657c97de1b0757bbb4addf251092cc7" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8e521ec60439fa16b8d3db610f9926d5" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "98f842bc28a8ce8e3d13bb41e667aab7" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8597f06e2e9b60b7f0179bb40a438931" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "6bd037b851470a97b6c5cc067a3adbc4" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "462e5a438bfeb73b6dce164fc37d917a" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "1d3c82d7f05589ca1762cecf9426c597" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "319b2b37ffe2ed7f4527a6931d87e203" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "7ee0d477f6e48b83337ab4910512dda0" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3d7045bf30546b24caaf68a626df981b" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "aee2c6bcbd86394401687d9a424b2317" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "79f0409057bad7087fdea7761b118cd1" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "01ccb2a362d3bab420dfdf7acb658eb8" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2fcea99c82dfa6fcc91ada16ed7c0696" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 64, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 24, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 384, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 24, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 24, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.norm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "e631deec96d178bbd0e15c1208ebc7b1" } ] }