{ "metadata": { "ParamSize": 325, "ParamBytes": 4398260224.0, "BitsPerParam": 4.500391607087381 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 209715200, "records": [ { "name": "lm_head.q_weight", "shape": [ 102400, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 209715200, "byteOffset": 0 } ], "md5sum": "90f029e66e6fbd893ff5310a812e176d" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 209715200, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 102400, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 209715200, "byteOffset": 0 } ], "md5sum": "001c16b21802858d85d6c93306f4cb4f" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 102400, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "69e707a7767a4527aed058e580d0224f" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6d28f498ab68225ace0e63827785dcec" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "3f835557a10e6d75c95341efff1c80fc" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 29892608, "records": [ { "name": "lm_head.q_scale", "shape": [ 102400, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 26214400, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26214400 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 26222592 } ], "md5sum": "3f2af91787e153b78e22580d5de72fa8" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0ff00c3f3eb21c561791ee95e26b8170" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "4d58ca9cca916f01a7248a3c1c38607f" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "051fb6f8c8ef5e1d3bb917f7c9e3cda8" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "219100119c0c9cfb87c20b4be8f5cf72" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "cae2edb6e21a42e53b2586f945c2bf49" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "dc48ca496ba07bce48619df41996c9d1" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "607ec87379590bf8c638126f6e0510f2" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2a98604805dd39c4fd9980c0bad05ac8" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "09acfcf01c95bd66a7ba961bf53da034" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "96793fc5243200faa185d0daab325001" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "054868dd6f22d7a4a6ea0e8a6ee49c85" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "9787ab1bb42ef8a24c777488cc8903e4" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "147bd9dd8a97fa53c0245eca04048efd" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f97ca2231642269fdbec9b977a49bfc7" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "7eefe7475e664c4d953789370e276889" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "479ec65c68c1b0afe1380f969062b06b" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "5e7c84410be547f6f0fe83b3c5a88f34" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7655698dc0f22689caab014586a1e422" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "1cefe11e74c15cd6b5b84934aa5a54d4" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "6ee1792e5bf037b9f3eac45fcd9039c4" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2357dcfd2b540739bab18d9c02575429" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "9c92182893ca24c4cc9b1fb40c8ff31a" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "360cd3270c824f69a1ea1633743dd5e5" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "cd99013a5e6a759e06607054fb93f22d" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "97de6e6f1f6b3079471cd02d24193336" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "96c70667e4e017d2b0585b080e446870" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "c73fbdecc399b9239b2775b681faabf9" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 } ], "md5sum": "5983cc8cd4f12770381f304821200ef6" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ada6bf6e8920e719725bfdc297218472" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7340032 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19922944 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21495808 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29884416 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30932992 } ], "md5sum": "ce41e9b936e4f2602aca40c7d49229db" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "6ee6612310c1ba5d1e54850b09a5e880" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "b72f95b66bb2a7657b585ff9d6d60f25" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "da11b6867b38938c6ea2b25627ca452c" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "061726c8f8b293fa4da8ec703c8cc3e8" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "4b696121679f8dde9dc3c1565a34b480" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "22e0f08e2bbadcac212a2e84cec7c2fe" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "881b0726ece9a8c3a8f5fb6829cae14d" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "cc9b4c71fd11e5a267497ff7d9b41021" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8e23dfbc6f10f2e57410e707452d971d" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "7615c7185c5e1c94bf18236e29aa5c88" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "c021407ce5478b8e4d3f17217265727f" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1d9079bd809a7e2205fa32c33ef47789" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "a44b5ff943733004688cc08949da25d5" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "bee354b36c130b38e93c419240b1f6f2" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "274a2ba97326df45510550ff709f4755" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1a6cea0e6f5b221fe305111b8bbd8999" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "9f1a6b2a1abfc6f2cab9bed6eab4f7dd" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "feaac15bc7c2e7c7f48ea36ecc194ea8" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b397a1786cd0c34ee5114afa9f2528ef" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "bf6846f3ce3fe86781c9e78886db6b5f" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "0f7a17f0e0183779bc3fe1294fd40600" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6a25f82e5e758e0c0ee2cd548a8b1848" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "24cfcfcd696811f18f39cb3718c29ebd" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "01f5e1944ea7183612d149a29b523bd0" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2b047aa802ad75829b7b885b9cb09196" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "46f830c41107e0832cdd63be9d009f54" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "d658a99ba9f5d296c11931bbd0a5b267" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "a52c05d9cc8f5decec8b2c80a7ce249e" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4c3f768011fbca41fa69bbfe15f16ab1" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "b454d8a70851681d73886ca56ec4d02b" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "05c446c77f11095c51e9dee68cc19f73" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "66411858c8984ec361fa4b94209253e9" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 32505856, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 11010048 }, { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 18350080 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 30932992 } ], "md5sum": "d6d14717d0ddf7145ac375ec304c2b68" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a4baed7724131a83f258ce76a4c29cb4" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f99ef65ae7fc5deea2c5d0383911e9e4" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "175e0a9f767c9bbd428141d4a186ca3d" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "c23700f4950f9cfa91757ec90a3fb9e1" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 27836416, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 13115392 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 13123584 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 13131776 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16801792 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16809984 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 16818176 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 20488192 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27828224 } ], "md5sum": "3887537a2ef6242f2d9508bc58f1dcb3" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6c6135782129a79b3e33ccf26487dc2b" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "7004c90e384e916b2379d5a7181dbcbd" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "26a4cfedec6303aa24aa10300c88720b" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "bc2febc0970cb1d80ba266a8fd5001b9" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "30b6629d8d10f68f2943b8e058a5c2c9" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "38bd32343c962bfd32978edca47624ef" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "54741c57aee7698f316b3836dbb1473f" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0e184c20d0f928cea3c6c40a31b548b4" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "973b62005086dba9cc8b55946a3a5731" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "37db3e46fb68429c8c65712f7263a5ae" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "47de7e3dc8e2903804db9ec5bdf9a41b" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "246b80fc44706057964b2cab7ba34fc0" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "252f6d359d891cb1fc199ca893eeec63" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "46fd70c1eea03363947cce86051ca6bc" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "1743432946d4e03356bade7ad88fe68f" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "fd276f189721afbe08d7acad3bb25f78" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "9d1db7d97c1a3e9a07bc8b7a8bf6babf" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "8222e075028cd3c1213ea15c0c338d0e" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "10a71e8adf7614782ac1ed988e7f49d3" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "b6a5bbdf94b60b0fd5ff6d9b6d3892c3" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "288d4b3b01af0e8753f5fd16ad80d27b" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "3e044a8727509b651cfd747b533cd65e" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 33046528, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9445376 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13115392 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20455424 }, { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 20463616 } ], "md5sum": "6b09af9918ca8eb2962dbcd4f3c81d70" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "40680a70888b7d244138d08f54a77acb" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "83342aac80b2e7a011b29c8e6d09b83c" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 22036480, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 1572864 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9961472 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11018240 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14688256 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22028288 } ], "md5sum": "5f94fb646d95912805fa73bbccd800e9" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1d9f83e4315a84c1b9e0915949dc599d" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "2b672e6f4a5627b5869823679ffd1d0a" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 27271168, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 14155776 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22544384 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23592960 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23601152 } ], "md5sum": "40ce0e0593af70e0374b5d0fd7593427" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5f1d65d1b2305bbc4b27fd81c018a77b" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 30949376, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7340032 }, { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 7348224 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 19931136 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 21504000 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29892608 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30941184 } ], "md5sum": "1556153fdd5b518eca75f6c0af38bdf5" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "75e5c6dae590b319e7f0cf0147f5602d" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 4096, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 0 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 28672, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 3670016 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11010048 }, { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 11018240 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 6144, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 23601152 } ], "md5sum": "d2de70fe6684efbecf04ea07f1907f6e" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 9445376, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 } ], "md5sum": "a98a952fca33b8eee3dfc9baeaf2c563" } ] }