diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,4311 @@ +{ + "metadata": { + "ParamSize": 325, + "ParamBytes": 4140998656.0, + "BitsPerParam": 4.070120983102826 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 262668288, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 512, + 128256 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 262668288, + "byteOffset": 0 + } + ], + "md5sum": "58bb3a884ba5f20edb72182486a45c0c" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "90243819b8fe4c6fef274410da882b85" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "0e20f69c3aa3a38db5d646ffbb7daca4" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 262668288, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 128256, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 262668288, + "byteOffset": 0 + } + ], + "md5sum": "607377f17782be27fff9c36ecab568a7" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "7243bbb4827433d4e6106d8e27986e19" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "7318ca39cddcd98456b832c18512a7c0" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 21962752, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 32, + 128256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8208384, + "byteOffset": 0 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8208384 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 8216576 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 9134080 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10969088 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10977280 + }, + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 128256, + 32 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8208384, + "byteOffset": 10985472 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 19193856 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 19202048 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 20119552 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21954560 + } + ], + "md5sum": "8d84b4bf8292e0b48020ae75d4627622" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "c0cc79e61de81c6e49956e1854003797" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "26ac4c4096da6980c9f61e86a1d3a009" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "9e90e43675446c51f2c58a9186ef7b04" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1d070fa7a5804938f2a8f8cbc474f672" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "5f7adb2cbce4fb050b421b6b8ca23767" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "a20d692709a226246d33c62a45574eed" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ab72611bf2bcee21fdb26bc0b1308486" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "17bc5c8a93422268fdae9bf1abcac526" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "5471c4a4c35aae66366e740a36dabafe" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d31530fd8966876b7e518bd67d3cf561" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "37b3526132f05dcfba8cdfdc91a12c05" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "50516261632bb66ca22f5652994fd04a" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1e0534a3f1335ee938e2204ddd900b43" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "1cb6842bf82bd7b679020cac1b4f2173" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "0b72fefff9cf352bd245761c6d1045ae" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b7e0aa2d4157f4538218a713538d132c" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "d87b43bd62f597bb04662b968f60e50c" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "168b4ceddf3b5a6422c4f858d806172b" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "44d91c05db86c646c0259049cf7dfe10" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "5357c7122720b56b9cb4e8dd654f73e4" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "c170a56fbf5f694aa84807e9dd225b3c" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "423df162ff3d0988dbdb2d4c10477592" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "62c34cb6c351f33ce84963f59894a79c" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 24379392, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21626880 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22544384 + } + ], + "md5sum": "9bdc292ce8ebfbcf0b776e4af4eb924c" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "cdc89744cf51618bc2d2c397e079d575" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "d6030415d833132dd2412c71962c67eb" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "5dfd27e45772cb6390032a5bb565b88b" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b9e411b7f54cf0b72e9c0b379b8c639c" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "509d82dd50e2d29064036974d748cf5a" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "6aa60e63b2c4a1d5f2f8f00cbe7cf6d9" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "87b28576884e3f82bb069da18be112f8" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "fe619178acb02dbd66a26185a9374d34" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "67d2ac7a06a388517e1617cd806134b6" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "58e2c378445a5bb86573bbb383f6b59a" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "793fbe09203720ff0a32236978fe3b45" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "d72f7a563b4f606ad9fe734db28dbaf9" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1573d752c094444c8ee3020c2bfd2d64" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "328fe0f8e71b1ed3571dfbe22c6dbf1b" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "799182ef2878a6f0d4cf0f07ce74890b" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1987242d1aab352ce14d8cf9347628a6" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "6117919abed755a17b10c9b3ae4c5da9" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "82e4a6eef1a96aa2086807fd81132c00" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ec97dea1ff6337102915242fa8b08df9" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "01fb2fbc4c81b8598c4e13cc9bbee85d" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "6fc0c8a9c0a711934a78e9a25e0384f3" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ec6155b8d36f33ef40ebd41360746b1a" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "e436734af696586429d1ac9570212de0" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "2f41d0c327646c563b5bd4339a3c4f9b" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "4498e0c810a1933a45e5235ae7c216e2" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "35a71d0875b53c1457f88e6a36e9b61a" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "ac4f058e0097fc6a976c378ba5a18a8d" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "40d100e5eeacd923ff128cf7c49df80c" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "b39bab36531eafb6a045f06b79c47baf" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "c9220b757eb443a6c3fa2f8e79719a64" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1a6531e32be93ffb0a16c8e662421b07" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "883c4e8595a012a0217061e044fa6e41" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 24412160, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21635072 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21643264 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21651456 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22568960 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24403968 + } + ], + "md5sum": "a25f2477f203a44a92231e8ad1544987" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "c810399ae7f9fff8c4ceab3886f914e0" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "04c64e1d1f859c98db074c7e08478146" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "2d2b22527e575d70088d85a242ea701a" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "8343132ecb815631e3dc8f1858c88fe7" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "88861fca414c77cf590686f2b5915e3f" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "641e904b8d66a6637c3c8e0ef341ff0f" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "a413a4d03ec5f5ba16e8e9c3cec17578" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "e51f3905faad924817e2819d09cdca46" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "90dbd35d6d1a8972b771b4142c20b7fb" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "874f4cb28d416b192d7942d23162f301" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "19089454abfbe216df343e574e920027" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "b6c160093e0836767b620a3921809ec6" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e1460d0a35c943eeb9f39da6dd6432e2" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "3eb69a2ea448ac946f49c5fb28a8cc9d" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "14fa4c2fad0f38e4308e601642fa25b2" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "3ea0265d4c541bd4667b38543b715a1f" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "5d2fc23e515fdf8621ec4ad538f292df" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "14ab217322dd08ae4eced65ca68801ec" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "6e988567397e19d1a928e5603f2cdc26" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "da8dbbc52c2a256f3a6dbc8c0ab494ad" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "7737f53720958b331796a63110be4bdd" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "46824b3072419c5eb6aa6df7afb20f2a" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "5f0aaabb23c0ebd403cb5e5fdd52a7b4" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "4dcd1725c7b27518f1604a95961ee722" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9e58df02f8ace797aa61a27770813ce3" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "826ef026b48fe3d26086943133c94f42" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "37ae4e30b99aa2bed59e30da9b431253" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f952ec8b6ac1956d5066ed0edd4665cb" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "ad2a43b352ff83582b858936076ab748" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "57016e31b3cc9e8662a90ee68ef272d7" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 1792, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "582b64f91d2330261926143753992e73" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 512, + 28672 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "dfeda00b7a35e0d92f8f6807d3b530df" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 24395776, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21626880 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 112, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 917504, + "byteOffset": 21635072 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 32, + 28672 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1835008, + "byteOffset": 22552576 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24387584 + } + ], + "md5sum": "0723789559983f3bac035a48c95363f0" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 21626880, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + } + ], + "md5sum": "1c4bb2c7be5a6585a265255f6ee4c72e" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 21626880, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 512, + 6144 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 32, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 512, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12976128 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 32, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144, + "byteOffset": 21364736 + } + ], + "md5sum": "ffa318082a9dfd644fea26106b9a62b8" + } + ] +} \ No newline at end of file