{ "metadata": { "ParamSize": 325, "ParamBytes": 3048549376.0, "BitsPerParam": 3.619307029695688 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 52736000, "records": [ { "name": "lm_head.q_weight", "shape": [ 32000, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52736000, "byteOffset": 0 } ], "md5sum": "c0676f03fc3b05f74cb6101f885dd778" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "51ca0626954f5d604646656cd7e01edb" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "3cd35eebd1bc345f5e97952df522af60" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 31492608, "records": [ { "name": "lm_head.q_scale", "shape": [ 32000, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6592000, "byteOffset": 0 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 6592000 }, { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 6600192 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 24688128 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 26949120 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 31484416 } ], "md5sum": "a7196c7e313876e2597d3fb0c2b2e08a" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "c85477d1a834a038edcad6752c6eb49b" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "1eb8a5f5db8296800580f2a5fb9e1c91" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "0cbb2658052e3b7f7ce241e4d70c7a57" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "d22080dccf24c8f72eb91b129dd83c74" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "c5b08a532d49d67ba7b55789a4ac4ad9" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "14159b83f40fcb272b6c2fe98d142fe1" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "e088aff2f56359dd61f2faecd491c4e8" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "113ec20c355c73f46faa8b12e9e29c56" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "0ad2ddbe34ca55bfc0f9a29890069f58" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "bfa19fac173678e4e55429f516e276b7" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "8ee5228d6df69ef98f7f495346b2e1ed" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "f3a57e10d57128786ef4767676603b47" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "8ff90d5d2504ba5433c97a92732829f5" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "8b648d681763760dcb5919debe8c4180" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "102114710078e0ea4b3058cc0e973c43" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "6018c3630a6471a1a6e26527d0d87f0c" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "e3e2757c78fbffebf9d258f46802ff4c" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "45248b3b1968925a03d0cec5756722e2" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "c4f9d39af22a83eac84e9aa02893dce1" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "42e87662ab20cc46f4bd19138022f586" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "c61d01b07e91d6ba4c29499a9480ef95" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 52736000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32000, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52736000, "byteOffset": 0 } ], "md5sum": "d1ddcaea2b2921ddbe501527c197dd71" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 32290304, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.embed_tokens.q_scale", "shape": [ 32000, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6592000, "byteOffset": 7602176 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14194176 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 14202368 } ], "md5sum": "4c815bd09b7cbf5ad5a3b8e64b813331" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "47159d99b09415620c688a8afab0b9b5" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 29586432, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 0 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 2260992 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 6796288 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 6804480 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 27055104 } ], "md5sum": "4133fa70267b578ef22d6974d399f932" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "3587a175fb21ba218df60d30b0521e99" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "68636dd54cda43083b6c43794af5c5a6" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "02c09cd8ef48288cd1a5fa79d982f590" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "9145bc06ad7837f80db22d2b524cb261" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "84c2e735f2e2d5bc28110dd002c5f868" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "a7f6a603afd7b833e2ccd6943f326287" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "f28db7ec829a8f5d2198a69352e0ee7d" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "2961fb6500a094b457f18277c3a64613" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "690b990b3f4f65507c5e1f5e1fd43a66" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "a92ce7113b4ef9c317f852448e09fe01" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "e62be00aa6e44f201de088c99da703be" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "817e3312b327bb1c7cf73d0b5197afff" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "9150adccae6b0ab653517fa5e01773c8" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "72438e9da7577c85dc881bc48e26f499" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "f785c0a4a4b60c7a018ad21ebf2acaeb" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "a991a2c8dbc06684d2938e6f7039bb8e" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "e2b002ff0ba23bac72bd7b0b23b946a4" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "d0b941ec0699a1e4cc2b8946491f0e7a" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "a4b533db749cf6a93c3aca91e8d6846b" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "08d41cdee466430c03b0901cf2b48bec" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "a6c0af8e80d771996dedf758f925d83e" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "7ef3a0cbb4fc7ee9ae3763ac74ade50b" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "eeb7e923ab33565a48d1b6aea87b5842" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "ef56d363b1d029dec84502342a3a8d12" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "1cc2e832ff9291ea4a9cf032582536d5" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "a30df28d0c16102dc945e0b9022933a4" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "3a615cc3ff3f6e0acb288740386251ad" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "765349a31c950853ae00588b666492d2" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "101c19c88ddf22767c1cb11267e377c9" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "222ec0e2359ed8762b7483d72d5a81c2" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "12c6dadc38417751d9c5fc2833b5d84e" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "fd0f3836da9b5edc47c0548a57e5fe52" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "79ccd136a01e4b568fd7017bf76da2b2" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "3a790e2feed025e07b0301a96cc38a28" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "6ab6bfca056c0f21b8d71f9ae944be6c" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "7f17612a178481039cea857468288575" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "0fbe394c539fc9800b49243eaa8246ba" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "53d405f605c1639216a36feb702b9f96" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "04bd3ba6a1ce56ab97ca34c9a627f987" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "8b73eb6ea2f450c66d89ed04a8534f1f" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "34d62832e255d53e84b5a4a980e4e8ba" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "d77f36ee367a1a902cc9527d0b3828a2" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "7f276bec236f75156221a98b9db11f5a" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "9ff8d2ceb6d8389955feb05cd449b0ef" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "603b667ec66f2a9644d184207619c92a" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "ac61eca6d8a6cc76025f8e1458c806a0" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "4c9703083994ac7c1bbd488d59bb631b" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "b8d5f8e101afac557c316992e7e37152" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "5147b6ff0c931662ca86dd4307c4522b" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "4281f8686590e82eb39dfc454bcf3d3c" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "1bdcbe2b4ca9da89b2c162830c402859" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "36486f1ca3e3a3a37fc30fa2e0ff1ec1" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "21702c3c45db82dd049a3e2247705ac4" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "2bf568c0c933d9b80669a733c66ebd0c" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "b899e41018535955a644e531ed8f4a66" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "6b5d29d696224cfaf69d95a4c6ccdb25" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "8177e1e8b413acbb780fd9fdae3f5d9e" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "cdf1d6abef4dd30a88f9f547347008c9" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "0466466c5b67b0c09737fdd06b9da93f" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "287f56d0cb3fd56fa4fbfb3c0c21c9b6" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "3d3cc2f8e97b8edffad08ff664f3c4f1" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "405a5a57eeb28fdef19319e862ca2ff7" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "5ee628ffbc393565bbf9f1fc08df27cd" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "61eb49109e1dde0d595670b2d843631c" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "334e4e90004b4210dbabd2a91d173598" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "2ccdc1b2abb58910ed1dc419ade169a4" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "8ea5f4fd6fc810fadaa0e13a40218d5f" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "d3faff1a5333014dd0d3e1aa710fbaa8" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "889992dde940292ce409220ad5a17535" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 10125312, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 } ], "md5sum": "515dcd71094db14415613f4c78250be8" } ] }