{ "metadata": { "ParamSize": 243, "ParamBytes": 863408128.0, "BitsPerParam": 4.036087612624202 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 49152, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "6e0db1a93d993cffb7aaf8a9c1d4ce7c" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 31502336, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 49152, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 6291456 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 6295552 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 14684160 }, { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 2048, 8192 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 14688256 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 1, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 31465472 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31498240 } ], "md5sum": "99e7ce954ccffec1675e26c93216c2fd" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 2048, 8192 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "b78a73072062b183c83026bb8404ced5" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 25247744, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 2048, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 6291456 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 2048, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 6303744 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8400896 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8404992 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8409088 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16797696 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 1, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 16801792 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16834560 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 2048, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16838656 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23130112 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 2048, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23142400 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25239552 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25243648 } ], "md5sum": "61a3da5bc7b8cb881acf49a479c2beb0" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 31510528, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 2048, 8192 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8392704 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 1, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 25169920 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25202688 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 2048, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25206784 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31498240 } ], "md5sum": "30c3b73941a80055640b4303272babc1" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 27312128, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 2048, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2097152 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2101248 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 2105344 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 10493952 }, { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 2048, 8192 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 10498048 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 1, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 27275264 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27308032 } ], "md5sum": "c611617a404e265cf5014e20c93edbe4" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 2048, 8192 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "378c98e7fc217fcd6537270e0f8207e8" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 25247744, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 2048, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 6291456 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 2048, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 6303744 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8400896 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8404992 }, { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8409088 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16797696 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 1, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 16801792 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16834560 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 2048, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16838656 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23130112 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 2048, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23142400 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25239552 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25243648 } ], "md5sum": "4ba882f42034c68bab24c8166a445a6d" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 31510528, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 2048, 8192 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8392704 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 1, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 25169920 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25202688 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 2048, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25206784 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31498240 } ], "md5sum": "5f2a267c4e9ede33dbe13802de268f6f" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 27312128, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 2048, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2097152 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2101248 }, { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 2105344 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 10493952 }, { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 2048, 8192 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 10498048 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 1, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 27275264 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27308032 } ], "md5sum": "f86aa9386d753591e3f67c6a33d68c0f" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 2048, 8192 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "bffe95b456369085cab1ee348d6a670d" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 25247744, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 2048, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 6291456 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 2048, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 6303744 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8400896 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8404992 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8409088 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16797696 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 1, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 16801792 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16834560 }, { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 2048, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16838656 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23130112 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 2048, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23142400 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25239552 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25243648 } ], "md5sum": "eed78fbf13c52b5cd02f9cd72743120a" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 31510528, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 2048, 8192 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8392704 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 1, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 25169920 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25202688 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 2048, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25206784 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31498240 } ], "md5sum": "8ab60047c335cc14acd349b8483df474" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 27312128, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 2048, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2097152 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2101248 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 2105344 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 10493952 }, { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 2048, 8192 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 10498048 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 1, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 27275264 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27308032 } ], "md5sum": "65ad69e45d0c1efcac8cff77764a1aae" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 2048, 8192 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "3ed61947a44f9b92ae24a3fcb89ba516" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 25247744, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 2048, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 6291456 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 2048, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 6303744 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8400896 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8404992 }, { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8409088 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16797696 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 1, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 16801792 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16834560 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 2048, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16838656 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23130112 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 2048, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23142400 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25239552 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25243648 } ], "md5sum": "8c747b95f1612f48e2636cc78849c66a" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 31510528, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 2048, 8192 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8392704 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 1, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 25169920 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25202688 }, { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 2048, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25206784 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31498240 } ], "md5sum": "a8c56a6776ed56399e6834a9b288a48c" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 27312128, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 2048, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2097152 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2101248 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 2105344 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 10493952 }, { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 2048, 8192 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 10498048 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 1, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 27275264 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27308032 } ], "md5sum": "70ef616fcc72fd9aaf7b66a1a37a590f" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 2048, 8192 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "9c6d8615e8e3110b2dc7dfccac7c9ebe" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 25247744, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 2048, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 6291456 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 2048, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 6303744 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8400896 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8404992 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8409088 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16797696 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 1, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 16801792 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16834560 }, { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 2048, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16838656 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23130112 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 2048, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23142400 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25239552 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25243648 } ], "md5sum": "063802cd3c3314db8ba92c47859389bf" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 31510528, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 2048, 8192 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8392704 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 1, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 25169920 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25202688 }, { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 2048, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25206784 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31498240 } ], "md5sum": "8a9c92d10dc52ed63f0537e0f0686bb9" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 27312128, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 2048, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2097152 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2101248 }, { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 2105344 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 10493952 }, { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 2048, 8192 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 10498048 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 1, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 27275264 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27308032 } ], "md5sum": "72f7469ab1aafb6491fe2d0b5e4d9fca" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 2048, 8192 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "107f0909d8d889f4885bd9c7cd3289c0" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 25247744, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 2048, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 6291456 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 2048, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 6303744 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8400896 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8404992 }, { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8409088 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16797696 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 1, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 16801792 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16834560 }, { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 2048, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16838656 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23130112 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 2048, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23142400 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25239552 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25243648 } ], "md5sum": "19629507324f20913f67ba7619cfb59c" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 31510528, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 2048, 8192 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8392704 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 1, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 25169920 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25202688 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 2048, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25206784 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31498240 } ], "md5sum": "c133bf74a986aeb9c4ee1df0bc8e3df1" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 27312128, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 2048, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2097152 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2101248 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 2105344 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 10493952 }, { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 2048, 8192 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 10498048 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 1, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 27275264 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27308032 } ], "md5sum": "e98ae57ab3ee099b45efeaaa7f9f063f" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 2048, 8192 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "ba7d70e9ab37fa9d5121dbc45c3cf5a5" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 25247744, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 2048, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 6291456 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 2048, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 6303744 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8400896 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8404992 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8409088 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16797696 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 1, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 16801792 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16834560 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 2048, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16838656 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23130112 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 2048, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23142400 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25239552 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25243648 } ], "md5sum": "405bfe03c21c60ce2f5cd357d6750d5f" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 31510528, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 2048, 8192 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8392704 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 1, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 25169920 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25202688 }, { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 2048, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25206784 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31498240 } ], "md5sum": "2f50210d633e38445f2aa2b8ea3ec335" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 27312128, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 2048, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2097152 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2101248 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 2105344 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 10493952 }, { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 2048, 8192 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 10498048 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 1, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 27275264 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27308032 } ], "md5sum": "b9ce099aeedcec1b447b220fc6ce5f0e" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 2048, 8192 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "51739ca23ef2c9c8f2306fdfda84620d" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 25247744, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 2048, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 6291456 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 2048, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 6303744 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8400896 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8404992 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8409088 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16797696 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 1, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 16801792 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 16834560 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 2048, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16838656 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23130112 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 2048, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23142400 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25239552 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25243648 } ], "md5sum": "bc80efda63f78fe5d3c1cba9c0d4cb72" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 31510528, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 2048, 8192 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8392704 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 1, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32768, "byteOffset": 25169920 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25202688 }, { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 2048, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25206784 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31498240 } ], "md5sum": "5e9668e4b64d4804074f24de535e7cd1" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 2105344, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 2048, 1024 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 1, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2097152 }, { "name": "model.norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 2101248 } ], "md5sum": "b7c034b012032ddf236bcb697e86efe6" } ] }