diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,2905 @@ +{ + "metadata": { + "ParamSize": 243, + "ParamBytes": 863408128.0, + "BitsPerParam": 4.036087612624202 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 49152, + 256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "6e0db1a93d993cffb7aaf8a9c1d4ce7c" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 31502336, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 49152, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 0 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 6291456 + }, + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 6295552 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 14684160 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 14688256 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 1, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32768, + "byteOffset": 31465472 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 31498240 + } + ], + "md5sum": "99e7ce954ccffec1675e26c93216c2fd" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "b78a73072062b183c83026bb8404ced5" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 25247744, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 2048, + 3072 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 1, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 6291456 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 6303744 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8400896 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8404992 + }, + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8409088 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16797696 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 1, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32768, + "byteOffset": 16801792 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16834560 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 2048, + 3072 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16838656 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 1, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23130112 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23142400 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25239552 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25243648 + } + ], + "md5sum": "61a3da5bc7b8cb881acf49a479c2beb0" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 31510528, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 8392704 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 1, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32768, + "byteOffset": 25169920 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25202688 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 2048, + 3072 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 25206784 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 1, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 31498240 + } + ], + "md5sum": "30c3b73941a80055640b4303272babc1" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 27312128, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2097152 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2101248 + }, + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 2105344 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 10493952 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 10498048 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 1, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32768, + "byteOffset": 27275264 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27308032 + } + ], + "md5sum": "c611617a404e265cf5014e20c93edbe4" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "378c98e7fc217fcd6537270e0f8207e8" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 25247744, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 2048, + 3072 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 1, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 6291456 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 6303744 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8400896 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8404992 + }, + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8409088 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16797696 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 1, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32768, + "byteOffset": 16801792 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16834560 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 2048, + 3072 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16838656 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 1, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23130112 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23142400 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25239552 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25243648 + } + ], + "md5sum": "4ba882f42034c68bab24c8166a445a6d" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 31510528, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 8392704 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 1, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32768, + "byteOffset": 25169920 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25202688 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 2048, + 3072 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 25206784 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 1, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 31498240 + } + ], + "md5sum": "5f2a267c4e9ede33dbe13802de268f6f" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 27312128, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2097152 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2101248 + }, + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 2105344 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 10493952 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 10498048 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 1, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32768, + "byteOffset": 27275264 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27308032 + } + ], + "md5sum": "f86aa9386d753591e3f67c6a33d68c0f" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "bffe95b456369085cab1ee348d6a670d" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 25247744, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 2048, + 3072 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 1, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 6291456 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 6303744 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8400896 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8404992 + }, + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8409088 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16797696 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 1, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32768, + "byteOffset": 16801792 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16834560 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 2048, + 3072 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16838656 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 1, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23130112 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23142400 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25239552 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25243648 + } + ], + "md5sum": "eed78fbf13c52b5cd02f9cd72743120a" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 31510528, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 8392704 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 1, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32768, + "byteOffset": 25169920 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25202688 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 2048, + 3072 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 25206784 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 1, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 31498240 + } + ], + "md5sum": "8ab60047c335cc14acd349b8483df474" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 27312128, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2097152 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2101248 + }, + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 2105344 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 10493952 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 10498048 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 1, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32768, + "byteOffset": 27275264 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27308032 + } + ], + "md5sum": "65ad69e45d0c1efcac8cff77764a1aae" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "3ed61947a44f9b92ae24a3fcb89ba516" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 25247744, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 2048, + 3072 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 1, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 6291456 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 6303744 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8400896 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8404992 + }, + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8409088 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16797696 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 1, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32768, + "byteOffset": 16801792 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16834560 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 2048, + 3072 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16838656 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 1, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23130112 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23142400 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25239552 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25243648 + } + ], + "md5sum": "8c747b95f1612f48e2636cc78849c66a" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 31510528, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 8392704 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 1, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32768, + "byteOffset": 25169920 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25202688 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 2048, + 3072 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 25206784 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 1, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 31498240 + } + ], + "md5sum": "a8c56a6776ed56399e6834a9b288a48c" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 27312128, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2097152 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2101248 + }, + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 2105344 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 10493952 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 10498048 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 1, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32768, + "byteOffset": 27275264 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27308032 + } + ], + "md5sum": "70ef616fcc72fd9aaf7b66a1a37a590f" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "9c6d8615e8e3110b2dc7dfccac7c9ebe" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 25247744, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 2048, + 3072 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 1, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 6291456 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 6303744 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8400896 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8404992 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8409088 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16797696 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 1, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32768, + "byteOffset": 16801792 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16834560 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 2048, + 3072 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16838656 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 1, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23130112 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23142400 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25239552 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25243648 + } + ], + "md5sum": "063802cd3c3314db8ba92c47859389bf" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 31510528, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 8392704 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 1, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32768, + "byteOffset": 25169920 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25202688 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 2048, + 3072 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 25206784 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 1, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 31498240 + } + ], + "md5sum": "8a9c92d10dc52ed63f0537e0f0686bb9" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 27312128, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2097152 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2101248 + }, + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 2105344 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 10493952 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 10498048 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 1, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32768, + "byteOffset": 27275264 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27308032 + } + ], + "md5sum": "72f7469ab1aafb6491fe2d0b5e4d9fca" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "107f0909d8d889f4885bd9c7cd3289c0" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 25247744, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 2048, + 3072 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 1, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 6291456 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 6303744 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8400896 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8404992 + }, + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8409088 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16797696 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 1, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32768, + "byteOffset": 16801792 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16834560 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 2048, + 3072 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16838656 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 1, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23130112 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23142400 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25239552 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25243648 + } + ], + "md5sum": "19629507324f20913f67ba7619cfb59c" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 31510528, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 8392704 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 1, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32768, + "byteOffset": 25169920 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25202688 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 2048, + 3072 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 25206784 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 1, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 31498240 + } + ], + "md5sum": "c133bf74a986aeb9c4ee1df0bc8e3df1" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 27312128, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2097152 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2101248 + }, + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 2105344 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 10493952 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 10498048 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 1, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32768, + "byteOffset": 27275264 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27308032 + } + ], + "md5sum": "e98ae57ab3ee099b45efeaaa7f9f063f" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "ba7d70e9ab37fa9d5121dbc45c3cf5a5" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 25247744, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 2048, + 3072 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 1, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 6291456 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 6303744 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8400896 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8404992 + }, + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8409088 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16797696 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 1, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32768, + "byteOffset": 16801792 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16834560 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 2048, + 3072 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16838656 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 1, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23130112 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23142400 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25239552 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25243648 + } + ], + "md5sum": "405bfe03c21c60ce2f5cd357d6750d5f" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 31510528, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 8392704 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 1, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32768, + "byteOffset": 25169920 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25202688 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 2048, + 3072 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 25206784 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 1, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 31498240 + } + ], + "md5sum": "2f50210d633e38445f2aa2b8ea3ec335" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 27312128, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2097152 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2101248 + }, + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 2105344 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 10493952 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 10498048 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 1, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32768, + "byteOffset": 27275264 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 27308032 + } + ], + "md5sum": "b9ce099aeedcec1b447b220fc6ce5f0e" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "51739ca23ef2c9c8f2306fdfda84620d" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 25247744, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 2048, + 3072 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 1, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 6291456 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 6303744 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8400896 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8404992 + }, + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8409088 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16797696 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 1, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32768, + "byteOffset": 16801792 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16834560 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 2048, + 3072 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16838656 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 1, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 23130112 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23142400 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25239552 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25243648 + } + ], + "md5sum": "bc80efda63f78fe5d3c1cba9c0d4cb72" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 31510528, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 8392704 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 1, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 32768, + "byteOffset": 25169920 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25202688 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 2048, + 3072 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 25206784 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 1, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 31498240 + } + ], + "md5sum": "5e9668e4b64d4804074f24de535e7cd1" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 2105344, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 2048, + 1024 + ], + "dtype": "int8", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 1, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2097152 + }, + { + "name": "model.norm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 2101248 + } + ], + "md5sum": "b7c034b012032ddf236bcb697e86efe6" + } + ] +} \ No newline at end of file