{ "metadata": { "ParamSize": 325, "ParamBytes": 4077395968.0, "BitsPerParam": 4.500422426883416 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "lm_head.q_weight", "shape": [ 32768, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "63492edb5f1451e10d4bdfccbac98766" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4ac25b77fdd99286a08817d73d488d3e" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "d670c55318b92e1ff8fa08a26b992522" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c1256b72782889a9051df73d1e7baa8f" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "9109edd61a1c8e156a9be90f34ebe27c" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 30441472, "records": [ { "name": "lm_head.q_scale", "shape": [ 32768, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 8396800 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 12066816 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 19406848 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 19415040 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 19423232 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 23093248 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30433280 } ], "md5sum": "33ca089943e0040ab0b68409af741507" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5adbe20af3c07b6f701007b824b36bce" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "cd720b6ef0a329134cac06d1588299b6" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "cddd7abb00bb443dda7a9dd893f1aecf" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "045581db4618cc829be433a6d136b4ca" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "ce12a5f29e6606cdbcffa5396d63cbb8" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "76d68f1de445b97b3ab83bd5fdb29580" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "ce2a1139cb7fb27d9ee345291f0392ed" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2019d084fac7f64482b17138b26c5138" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "f59ecd9536685015ea3b97743a0a9725" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "30b3c0187e20dd441392b4e10817ac81" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e6b0f2e75e87f022aa67e13fbbc87f77" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "e74ca5e75a8928789768864b97232bc1" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "950901b7f5fe4d3f7d88e0c4fa403ee4" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f796aebb734c2668a02acff2bfe2c90f" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "0c58de295a84ec96ffc4d026a62e65f5" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "097e2b63ae7fbe8c1828cd30a71abc8d" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "81f27349521f720ae7ae373d9b49243f" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "dbf29bd2948e38da78ee0b8de130018b" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "da7c1f49ad38be0de94cb525ff4cd55d" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "9ff885b68c317437eb8cd4345cb74947" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "c6bd64d3849940d56a5f2783becca879" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "d949ac9cd6f8a9f7bfbfd58b6d67ccf0" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "854b28c1a974eb2a399e22eef842ad9c" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f2232bbd03d0412e69f665146dc33a66" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "66ca10c148f2bac9c87c48149e42f7b6" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "9d32aae03aa99d66beec6b5f4430874f" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32768, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "d368f1d9c3ca9b7098d7f09b8f06efa2" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7800212a8c5f4da667a905dd2bafd60d" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 31997952, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.embed_tokens.q_scale", "shape": [ 32768, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 23601152 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 31989760 } ], "md5sum": "62b7891c4bd8f4ad3bf9678474ef4232" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "1fd16ed6f1019a5a64f40002e3ec2f00" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "17f4a3fe85b11f70a536ca392da6b0b0" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "484034d92331f8c91a824fe3ce1716b9" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "75f3d6ab3e1a5b5d818a150429eaebb2" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "52f25fe879be75743961e87d7830e814" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "85d0bc75512415b4d251321d53a466d6" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 32505856, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 11010048 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 18350080 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 30932992 } ], "md5sum": "d032ee1d23e426d5df8bca8f42865f21" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6b758f511f62b4b06ecc7eea648a402b" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "39b5e898f3dd0e6639de318f58f1b663" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "0ebee16c506c719a043f8508e0286aa5" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "94df2986712232288ea15dc24698f2d1" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "c5786b67dee764a239277c3e65d772ac" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "4068c7b01bfb810df75e65ffd412eb30" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "448c61d4a3cc249877b5b78b2474865f" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "bdf3b353affa0f52e7e35f1141b8e957" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "15a23ac38e797e5097038381e0f0df70" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "dbc694b7c1cfa3722e4aefa46a5674e7" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "2cee1ad1df2bf14765043123e3eedf73" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "7efbfa856bb936b9b5a16ea86405e0a5" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "44ec7c40f0e996a16d001f98b53a8044" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b91dfc726ab9bf240fd55519277db66c" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "875fca78143f2f08a029be2f5f284eaf" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "90bc458f814d531388c936aaaeee115b" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b711532bae5d442e77136f2b07b153fd" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "7eec3ba8067879dca6cfd0ede3f37bb9" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "6795002ed371a06dcac3c6ef103c2956" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "29be0ab2ec667cdcd6215e6669cc92f0" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "6b70776bbf219490bbcad3d624125c06" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "736fdf1d87cb1fce592a0f9e02c7ef6b" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "77c5eca1e9339764505dc9ae18aa0c2b" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "985f2fc4d455ba53e8b10b14a0f7b390" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "80f30b4f0789cf8b739b5c104b2cca42" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "3df80ed996fad2a3a5f25bcfe3db7f49" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "186ff5f83d6a89f163b08a430fb5749c" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3b3d860b2325341016a0aba683d227cd" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "fb3694a8b2bbb2840614821e98513ab1" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 13115392 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 13123584 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 13131776 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 16801792 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24141824 } ], "md5sum": "9f5dd34aba23caf62e5a44505b88915c" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "bdba425add54b2ad6935dca2fc3f317d" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "6030d94dfac0369afe1a2f7bd14b5ea0" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "94663bd52cc9bf0c9c6923bfc294a8a2" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "73e2b769f22c0e87111d53f0350f5184" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "db6c71fcc103c7491ea86e951d606df1" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "18b9c7644caca3032a3f49e742ffb699" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "465fdfba8d07b5a0bdd8639567199afc" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7fae165da931f5b305d507b76c79e391" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "5d63159436414bd9f169bc6c48f4a483" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "1d20dfb3ff02ac9a6e60ff5f693d0298" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "02ec4de5c7e56b3c53214f453d750985" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "97141640fe963e3903ed9d061354a8cb" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "f909ac37d90c9a520514fac9b826f408" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8f330830df9e7424b29e9e85815e2d5b" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "73074848805b0ce18f45a96ea6f5f306" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "4ec4eb37bdb3052d6048560efac513a1" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "526392f8c997a3ab9b205ad5e27e57cb" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "43177bf81ee5f4a9ece08f004433e4d7" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "d04893a2388c752adc03b842a86257fd" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "acfc6354c9e5df9fc9e6169209b8593d" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8370dd499a26af1c922a94006daff934" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "2ac2cfc87e04bd47439300bf8fa7d7eb" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "b1c5985b0b684f0be44b93b7703fe3e1" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2b3b9304f3b89aefc3caec884bd15e27" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "670d4c07b07cfc274d680209a04803c8" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "51164b6764e412eee8e690d9ae3c3fe7" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "360731e4c9ecd21aba27e99342d01c49" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "59cc33bb70c188ab14bed0b2cf6e8ba7" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "b85f8c141bc88aa9c8981b0cf1965997" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a028fc7737a02cdc0dd3ffcdcceae07e" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "2b0b5f9982a913f763113485250a900e" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "ee88af691e911deb4d7a3ab4e5511044" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "01aefc662974d904f2a98c48aec43cc5" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 9437184 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 22020096 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 23592960 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31981568 } ], "md5sum": "98773e1d5d9977989799a8613f87f498" } ] }