dusty-nv's picture
Upload folder using huggingface_hub
3daa4c0 verified
raw
history blame
99.8 kB
{
"metadata": {
"ParamSize": 243,
"ParamBytes": 863408128.0,
"BitsPerParam": 4.036087612624202
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
49152,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "6e0db1a93d993cffb7aaf8a9c1d4ce7c"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 31502336,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
49152,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 6291456
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 6295552
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 14684160
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
2048,
8192
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 14688256
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
1,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 31465472
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31498240
}
],
"md5sum": "99e7ce954ccffec1675e26c93216c2fd"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
2048,
8192
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "b78a73072062b183c83026bb8404ced5"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 25247744,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
2048,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 6291456
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
2048,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 6303744
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8400896
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8404992
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8409088
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16797696
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
1,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 16801792
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16834560
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
2048,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16838656
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23130112
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
2048,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23142400
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25239552
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25243648
}
],
"md5sum": "61a3da5bc7b8cb881acf49a479c2beb0"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 31510528,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
2048,
8192
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8392704
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
1,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 25169920
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25202688
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
2048,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25206784
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31498240
}
],
"md5sum": "30c3b73941a80055640b4303272babc1"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 27312128,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
2048,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2097152
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2101248
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 2105344
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 10493952
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
2048,
8192
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 10498048
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
1,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 27275264
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27308032
}
],
"md5sum": "c611617a404e265cf5014e20c93edbe4"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
2048,
8192
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "378c98e7fc217fcd6537270e0f8207e8"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 25247744,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
2048,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 6291456
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
2048,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 6303744
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8400896
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8404992
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8409088
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16797696
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
1,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 16801792
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16834560
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
2048,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16838656
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23130112
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
2048,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23142400
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25239552
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25243648
}
],
"md5sum": "4ba882f42034c68bab24c8166a445a6d"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 31510528,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
2048,
8192
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8392704
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
1,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 25169920
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25202688
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
2048,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25206784
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31498240
}
],
"md5sum": "5f2a267c4e9ede33dbe13802de268f6f"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 27312128,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
2048,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2097152
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2101248
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 2105344
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 10493952
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
2048,
8192
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 10498048
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
1,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 27275264
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27308032
}
],
"md5sum": "f86aa9386d753591e3f67c6a33d68c0f"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
2048,
8192
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "bffe95b456369085cab1ee348d6a670d"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 25247744,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
2048,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 6291456
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
2048,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 6303744
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8400896
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8404992
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8409088
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16797696
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
1,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 16801792
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16834560
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
2048,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16838656
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23130112
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
2048,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23142400
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25239552
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25243648
}
],
"md5sum": "eed78fbf13c52b5cd02f9cd72743120a"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 31510528,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
2048,
8192
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8392704
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
1,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 25169920
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25202688
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
2048,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25206784
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31498240
}
],
"md5sum": "8ab60047c335cc14acd349b8483df474"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 27312128,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
2048,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2097152
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2101248
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 2105344
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 10493952
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
2048,
8192
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 10498048
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
1,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 27275264
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27308032
}
],
"md5sum": "65ad69e45d0c1efcac8cff77764a1aae"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
2048,
8192
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "3ed61947a44f9b92ae24a3fcb89ba516"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 25247744,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
2048,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 6291456
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
2048,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 6303744
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8400896
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8404992
},
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8409088
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16797696
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
1,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 16801792
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16834560
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
2048,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16838656
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23130112
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
2048,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23142400
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25239552
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25243648
}
],
"md5sum": "8c747b95f1612f48e2636cc78849c66a"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 31510528,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
2048,
8192
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8392704
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
1,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 25169920
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25202688
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
2048,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25206784
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31498240
}
],
"md5sum": "a8c56a6776ed56399e6834a9b288a48c"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 27312128,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
2048,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2097152
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2101248
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 2105344
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 10493952
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
2048,
8192
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 10498048
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
1,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 27275264
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27308032
}
],
"md5sum": "70ef616fcc72fd9aaf7b66a1a37a590f"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
2048,
8192
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "9c6d8615e8e3110b2dc7dfccac7c9ebe"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 25247744,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
2048,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 6291456
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
2048,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 6303744
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8400896
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8404992
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8409088
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16797696
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
1,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 16801792
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16834560
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
2048,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16838656
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23130112
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
2048,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23142400
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25239552
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25243648
}
],
"md5sum": "063802cd3c3314db8ba92c47859389bf"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 31510528,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
2048,
8192
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8392704
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
1,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 25169920
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25202688
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
2048,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25206784
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31498240
}
],
"md5sum": "8a9c92d10dc52ed63f0537e0f0686bb9"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 27312128,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
2048,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2097152
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2101248
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 2105344
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 10493952
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
2048,
8192
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 10498048
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
1,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 27275264
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27308032
}
],
"md5sum": "72f7469ab1aafb6491fe2d0b5e4d9fca"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
2048,
8192
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "107f0909d8d889f4885bd9c7cd3289c0"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 25247744,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
2048,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 6291456
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
2048,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 6303744
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8400896
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8404992
},
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8409088
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16797696
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
1,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 16801792
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16834560
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
2048,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16838656
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23130112
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
2048,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23142400
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25239552
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25243648
}
],
"md5sum": "19629507324f20913f67ba7619cfb59c"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 31510528,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
2048,
8192
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8392704
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
1,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 25169920
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25202688
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
2048,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25206784
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31498240
}
],
"md5sum": "c133bf74a986aeb9c4ee1df0bc8e3df1"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 27312128,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
2048,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2097152
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2101248
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 2105344
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 10493952
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
2048,
8192
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 10498048
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
1,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 27275264
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27308032
}
],
"md5sum": "e98ae57ab3ee099b45efeaaa7f9f063f"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
2048,
8192
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "ba7d70e9ab37fa9d5121dbc45c3cf5a5"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 25247744,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
2048,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 6291456
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
2048,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 6303744
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8400896
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8404992
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8409088
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16797696
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
1,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 16801792
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16834560
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
2048,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16838656
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23130112
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
2048,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23142400
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25239552
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25243648
}
],
"md5sum": "405bfe03c21c60ce2f5cd357d6750d5f"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 31510528,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
2048,
8192
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8392704
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
1,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 25169920
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25202688
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
2048,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25206784
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31498240
}
],
"md5sum": "2f50210d633e38445f2aa2b8ea3ec335"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 27312128,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
2048,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2097152
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2101248
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 2105344
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 10493952
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
2048,
8192
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 10498048
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
1,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 27275264
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27308032
}
],
"md5sum": "b9ce099aeedcec1b447b220fc6ce5f0e"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
2048,
8192
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "51739ca23ef2c9c8f2306fdfda84620d"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 25247744,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
2048,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 6291456
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
2048,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 6303744
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8400896
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8404992
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 8409088
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16797696
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
1,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 16801792
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 16834560
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
2048,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 16838656
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23130112
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
2048,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23142400
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25239552
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25243648
}
],
"md5sum": "bc80efda63f78fe5d3c1cba9c0d4cb72"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 31510528,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388608
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
2048,
8192
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8392704
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
1,
16384
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32768,
"byteOffset": 25169920
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25202688
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
2048,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25206784
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31498240
}
],
"md5sum": "5e9668e4b64d4804074f24de535e7cd1"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 2105344,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
2048,
1024
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
1,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2097152
},
{
"name": "model.norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 2101248
}
],
"md5sum": "b7c034b012032ddf236bcb697e86efe6"
}
]
}