onceuponai's picture
Upload 64 files
8aa5ae6 verified
{
"metadata": {
"ParamSize": 505,
"ParamBytes": 6981894144.0,
"BitsPerParam": 5.001000089116852
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 65798144,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32128,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65798144,
"byteOffset": 0
}
],
"md5sum": "ab58c9a64811f2b16dd01e5693b3e2d9"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 65798144,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32128,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65798144,
"byteOffset": 0
}
],
"md5sum": "ed992c43568bca295339e23b4698b1e1"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "1981bd9a7a9fbe36d9861ec94327edc4"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "d269d2364018474e10f441fe4e0d8ba9"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 27475968,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32128,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8224768,
"byteOffset": 0
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
32128,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8224768,
"byteOffset": 8224768
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16449536
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 16457728
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 20127744
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 27467776
}
],
"md5sum": "3f88e5ab068831023dab0278aea5e188"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "fe6a33f2e0b2c986709bc455c3641db8"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "a2ec133ffce2b459b61c8fbbe46cf017"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "8108cbea87a2d17a14308a2494176229"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "1b5177ae555c3348c9545b1d21277d4a"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "1d42f72697397bbe9323030a7304f22d"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "5bb79dbdb8cc8394aeb76fe3274c6e2c"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "13c36f3e098f528a9e26972a808435c1"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "6addf1a20a2fd0434426507986b7581e"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "f15cc86736534d85f4838263942dd33e"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "8db1dcc0d2f836a7bad1b85114b1825d"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "861d42fd80780854aab821ff5651f559"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "e4ffb1c3e8934addf7567a9752d94809"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "d1e814434a81a2c182e933d5a74382be"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "3120bc553d4b6c5cdf57c4b01de3aa15"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "331bc6fbf9e6ba7b4c2cb4bba534e9af"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "5c8a21d21ea499fd84be50125dfc2c6f"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "b5746f66046650b1299205debcf5dead"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "12972b8e16dda7ddecc9f502a68b9771"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "e1ed69a25639bcf949da6c782d96f2d2"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "ce5d510326b7e2db160d72a977be3f90"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d79c0b117daf6181d326eddca2f112cf"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "42a6ac09dbcb3b77f9f061db958f59b8"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "8b37a4fcbab7a0c0a3fb04776a2d013c"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "72b2bc8544d566a5b4e7aacaacfddca4"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "385123ab22306a5b1fa5f2acc6083df3"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "f4bb8dae6352fd997770809b35afd0a6"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "4aa7335ad312453f63a0045764272f31"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "d725b00bc5b3d1e8eecc2b1e16426bdd"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "76c6a31849ae99f2d99492717f4e9db2"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "c8102c8f677544d6553875d501fb25d5"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "e0ecc06df7a5094ee438215df1eafa9c"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "71b04f1385c28c209eccfb0408a1ee08"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "dfc03e3599d5bf59963e32e3f4f593a3"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "0278cd288cbf8e81a0173495de2e54cc"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "664eeb487969eb6ee1b43ff7dfc7294d"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "982d5c9f1f7b467dc20938bd9694e7a1"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "bc496ce03ecee23943a0e4a0187abe11"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "7d1a24f883a9f631d843b017a5711eb4"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "c5ce597236a5dd01a6360c04e8c1d064"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "81cac5a8b8b7b7ae02a1ea7a8b4c535a"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "eafee0fb7e50605f972cf8dbc6d9c2b9"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "997848f2820d1936ac0438517e4b350c"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "18661f5bdd4bf36aaaf2c65dfc873e9b"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "d7cae1ac2821f3c9a65d6a6787dd89d6"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "6f343200029f916ff2c25d7e6e9a6831"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "9e5521a80e531f43ee6a599fbf76c966"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "c64d6b6fd537d2faad2fa3a1122bbe4f"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "fdc3f797c9538e7f008457e118dd0d7c"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "130c5ea339f854f5c09773a91f0d443c"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "3d8b3babee6fd31c986d43ae7148f8d6"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "d41536f81917cdcb5d5999da245e663b"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "76d5f94a3487b65e92e7e8608fe6abab"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "a15c477c230db3002b88e10ea6d6baff"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "b4988fd70dbdd109d714e192c7bc53d4"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "1951d125025076f8288a7630ccf23b1a"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "3430414581bd365981645910d220d7f0"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "df1ce5fe34a990fdfefcdfa95d69eebd"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "e069f40e53cd7f5adc11723e0774e95a"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "7d8ccfa8bae8d002ec0aa4f67a09fc3d"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "abeb38b2a9847f45d93a28a17ce7a8d5"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "d7fbb43199503c7b4699effec8b8748b"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "8c2f310b817d10c1626852ea644e2db4"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "b4e7643bdbec7469a4cc2846ea2e8456"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "01cc514e7ee4a5acd86dd57fa2f81ff3"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "870bfba6f9aa3e8f7b4e858dcc615c6d"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "b36ea0e3262979da8bd8a1b14f2f6ed4"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "18b5e4e89ae7b91e67b77d6e36635480"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "dd25dff6275354a2b46b60b7a1ae33c5"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "94d4ba93d92046ea3d0fa1632997dc82"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "a1596dc19002d59af33a6e30d5d1d5c8"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "948ffa1b676f3ea1ed13a2f56f373114"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "ab900066e1aca20931b15ccee766da97"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "4bd399e395a69674e84c0c4302b48325"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "3d234c54d77662c1688441c1903dafcf"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "6835388f83a7b45562500c1894550f83"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "263958641fe2d1b97fe2daca51fbed5f"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "71605ae09cae2def283fb157b33a91d1"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "88ea1313e05e3161c89924a84f14e512"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "35ed10a2cd085024bf058a28d2391156"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "77afbc9ec48f462b1243ab91b2d37645"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "3ea1836f037541a8bbaab3f8b738faa4"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "6271b1d6f17c64eafd4e75fad35125d0"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "3b30a815e20bbe37807549f22e54143e"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "f0a2cf94d8356180fb08c0ba387fe820"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.32.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.32.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "355080509abcec398751b6145e6917f7"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "0c0a122aa44f96c325662ce65c65acd8"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "ce7cd418483b2712d20e6dae4290e596"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.33.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "794aeba70b08fb8e08a4f37fec724bcb"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "42a90e05472253c46c36f8b4f7d5d009"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "83b58899b57882fd77543fd9246c3a85"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.33.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "9bd85bb1afde60af08985f016bbc9e34"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "53fe83bbf35c16d0696a0890f117ff3d"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "632dc2b907b69ea1ec2cbaf44fb5c3b7"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.34.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.34.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "e5394433d2163c76993faade0425348f"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ad4354e78dfa05bf61671bdd21f14df7"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.35.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.35.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "0ae3e07d3c3cb9e4eef0221a90ca6dac"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "c047fe87603c1da25441d6d69d3ecde2"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.36.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.36.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.36.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "1fededd928003119156c2aee5c8400f2"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "23cc58ad6b1f1832fb968e2b1064beb0"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "b4ed7268c09275e098002f5915d1cf50"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.36.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.36.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.37.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.37.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "49034d1b760a3e2fb9bc3f3fbc72bcf0"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "38b48fb730249736da885c9a4ca23359"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "b98f0ac211a91a2d995c78fd8d8b2281"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.37.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.37.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.37.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.38.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "b8635d9b726d9afb13054da9fc10e1fe"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ea37a6d78b264140bf83fa0dac58ccfb"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "6d6eb29911ea92ec6c0834819781e805"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.38.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.38.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.38.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.38.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "5d5cc6bf2042e9076145e61dab7fc78e"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "e89873765932f148b3fdfbf3268689e3"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.39.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.39.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.39.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.39.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "745dd4c8fca3dcfabac2a4f328495446"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "2d228d4f8762712a1f1a2019158c2ae8"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "30ae5999c80b960cdb1c9dcd65ac43d3"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.40.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "f35ffd4aba4396620eb0959ed8c16442"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.40.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "63c55b28e72aaecab2adc23d92598581"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.40.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.40.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.40.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.40.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.40.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "9f2850c3ea4619d48e2e2dcfcf7a9e6c"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.41.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "cc2a97139bee936f158f626a7850d440"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "ec378de2953af9450bb4229230263602"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.40.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.40.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.40.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.41.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.41.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.41.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.41.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "709e216fbf09283bb7eb9dc379dc27de"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.42.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "4c03726a001f62b15792cdd471733ea0"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.42.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "ad9ef59b3ede831960eb1f5e20a5032b"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.41.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.41.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.41.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.41.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.42.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.42.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "32803da1d76d6c182b39645cb3636a0f"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.43.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "7d4fc73c2d54e04961c1494f26a499f7"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.42.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.42.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.42.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.42.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.42.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.42.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.43.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "58437264fd89809e367778bb0a84953b"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.43.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "0bee97fc389625e77f7c73871fabe2fb"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.43.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.43.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.43.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.43.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.43.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "c6261aac21df150da57c296456e11b56"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.44.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d9efbe95d4abeaf973a99eca178b694c"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.44.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "d3f6358ed09a87b6a525cb044ac5f25f"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.43.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.43.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.44.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.44.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.44.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.44.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.44.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "b5c12c67751dfda3069dfe6e22ec9555"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.45.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "43ec33cd3eca8397ca6be399c8e5c939"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.45.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "a9e291bbe57f4c3bdf91693c484afe45"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.44.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.44.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.44.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.45.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.45.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.45.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.45.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "1cbd798330e317d96e3fd378abf74d41"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.46.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "03f3b00a0d5b5d90a9ebc2c21150a2b9"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.46.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "91f9ca00af3c9fae7f01fd9643b6d142"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.45.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.45.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.45.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.45.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.46.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.46.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "17b0f60b8ffe634134f366a41708cf1c"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.47.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "baee814ea8b30a4f5938a418cff1a66d"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.46.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.46.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.46.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.46.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.46.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.46.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.47.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "d7a955908e14752c155886a1811f7891"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.47.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "9f5f6fbaa6db664950d56366a4ad79ad"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.47.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.47.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.47.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.47.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.47.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "aadd7bc611473a7418a37d03e5c6a05f"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.48.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ac8c26a57e8216deb138cc723a41a490"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.48.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "e2172a6f4f65f11f169882b6f2df1cf4"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.47.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.47.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.48.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.48.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.48.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.48.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.48.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "4d85d61f8993472a896f1318fb484963"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.49.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "8a51f516db23931ed7dfa24b98efc6ba"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.49.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "6ee2d6f10bc5c423011e82e280862c20"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.48.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.48.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.48.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.49.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.49.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.49.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.49.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "ced1f4c1a9a1a33a545fe3f0da717893"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "72a11b1757b28c57414f5ce08b2a0ac3"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "9deac6270c092393705d7d467e0274e1"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.49.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.49.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.49.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.49.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "d046558969e6783601ea258ed9853be2"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "6e1c3cf04ce5f948c9315b89111c82ab"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "57d795734006510f02c237264bb219f4"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "7d8050a4ee5ef68b86447822be7a8e0b"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 0
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 3670016
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 11018240
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 23601152
}
],
"md5sum": "d9170c6e885825c0ce140023ccd11e4d"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "5794686fb2c51b0d76cad0a791f2a89a"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "01b0fd2b7ff656aa78a317018f5c5437"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 33046528,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 9445376
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 13115392
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20455424
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 20463616
}
],
"md5sum": "1340e4cecbfba3f49fba3f198f4dbb87"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "3a4115a73849b8a631407ca44013598a"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "b286149d3770501af72dcf0ddddfb96a"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 1572864
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 9961472
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11010048
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 11018240
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 14688256
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22028288
}
],
"md5sum": "aa916d3d3c8f78fbf6d9c1d9c67df4b7"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ee3a37e7718102def70e31181bb07d40"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "02f63966cdb32cb81de20713215830f6"
},
{
"dataPath": "params_shard_163.bin",
"format": "raw-shard",
"nbytes": 27271168,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12582912
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 14155776
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 22544384
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23592960
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
4096,
448
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 3670016,
"byteOffset": 23601152
}
],
"md5sum": "432c393d43b11d0883b2b431a6d740c0"
},
{
"dataPath": "params_shard_164.bin",
"format": "raw-shard",
"nbytes": 30949376,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
28672,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 7340032,
"byteOffset": 0
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7340032
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 7348224
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
6144,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 19931136
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21504000
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 29892608
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 30941184
}
],
"md5sum": "f9149d20e1e48ffc3b44f6afe09fe50c"
}
]
}