CodeLlama-13b-hf-q4f32_1-MLC / ndarray-cache-b16.json
CharlieFRuan's picture
Initial commit
37d21fc verified
raw
history blame
190 kB
{
"metadata": {
"ParamSize": 405,
"ParamBytes": 8136417280.0,
"BitsPerParam": 5.000860280867739
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 81960960,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32016,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 81960960,
"byteOffset": 0
}
],
"md5sum": "40bb94b868d5c5d1178782370217fc2a"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "3a50633aa29e3574e346057a8d247fff"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a5057587038042a69b99533dc190867c"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "701fd58bfb1bbbfdcc0713b2ca1f016e"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "f4f58d6a2bab6a86b20012fe6c10e834"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 32896000,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32016,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10245120,
"byteOffset": 0
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 10245120
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 10255360
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14679040
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14689280
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14699520
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19123200
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 27970560
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 27980800
}
],
"md5sum": "fb8fed55b32956fb0961e765a51f7d66"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "c097987bc73eb1b20d061b9ab925ff89"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "9ff5ba83e446bd25ad5a64675a3c388f"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.32.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "ef57ee584e72f211fe7fc179ff7d3e93"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.32.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 28037120
}
],
"md5sum": "6dcaf4617cb613eb139f19b26be99b6b"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "9ae0129da1d975ede149da8816c37063"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "9236d1f1cda6adcc8a239a4ad1446218"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.33.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "aa1cb50a672a2e2cdcc9fa5c873a960b"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.33.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 28037120
}
],
"md5sum": "fd3044cb5590f13dc720260c7f3ad145"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "49ff5b75617ba4290f6d5522bd9e9843"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "4676ba38530e292c55813879c5735bbc"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.34.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "70fae7bbd7c8eac6ddb032a9b0a73e8e"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.34.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 28037120
}
],
"md5sum": "2946de0639239bd7e01c9a18d51decaa"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "43b21bf0cca926a4f636f3b60fa07b77"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "3673cf37d1a47e6827e31c9ef1549ba7"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.35.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "2974696a3a852cd5679806f98b99610e"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.35.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 28037120
}
],
"md5sum": "48785d75eb706e6e727d1a5a5fb6c079"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "140bf34c77f04cb3fd241d255cd39df0"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "d05f968a20b6c5c69d7c9ed34f070424"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.36.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "31827ffec05585d2c50365ccb3cb6e07"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.36.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.36.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 28037120
}
],
"md5sum": "24351968087a9f2e925ffcd7c276fa24"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "c46674ce8941f201965f491b71f75221"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "5086f85ac33d8f9ea04da92cbb71c378"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.37.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "ff18512cb689421803efb74fcdb45c88"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.36.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.36.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.37.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.37.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 28037120
}
],
"md5sum": "9062355d19a2068fe3ec61dfa2a46fb8"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "c3b9f5f93954f7c06d54cce989520327"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "4f93d31b35e4c667cbd189b0e5e0809f"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.38.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "10c1f4c4f3a90d99a0b7f7340afb42f5"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.37.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.37.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.38.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.38.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 28037120
}
],
"md5sum": "f4ad1254f9fb6829d6fc3702678b62e2"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "9031c79b513fb4cb538b9f77c00f97df"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "7ad3a2cf23f2c1e6eac83acb507afcb5"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.39.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "0316f56fba1199017d9f77392cfc41e1"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.38.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.38.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 14755840
},
{
"name": "model.layers.39.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 19179520
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.39.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 28037120
}
],
"md5sum": "6bd26e10057a4cdf04717f81f892ab63"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 81960960,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32016,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 81960960,
"byteOffset": 0
}
],
"md5sum": "6d50578ed15d045b9e9337ce19dc9e13"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "88ad046b06caf1f099410da09e8c08ae"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "aad31fe9daebc1a4333520dfeeac5e16"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 29434880,
"records": [
{
"name": "model.layers.39.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.39.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.norm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 14745600
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
32016,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10245120,
"byteOffset": 14755840
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 25000960
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 25011200
}
],
"md5sum": "a42c4c5722cceb56ca3843f23030d8d7"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "a9ad960dbf9898a0110e5209d7f851a6"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "d38e117d1bc54770b58a357150612cb0"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "627bdaf4e05bc259572cffb97c566bb3"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "c11b49c5523eec5f0362136acf70b693"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "464c47fed5e408d220b180840688e047"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a07015e2d274f11f7580ba57d61f7f58"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "2157878fbf0371f9aa46fa719f3c2e44"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "f62cf06699e721f8cb947297e6d75038"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "4c166d3cd944ce212d82411cab33a3db"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "087cffa8993baed646f70835ed01d801"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "45240ef0c94bc57f26718d44aa71126b"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "ee90bd70c1631e2d29868f03953722fb"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "b84556f79a653cbdd600a90e5423fb10"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "e30f53f61db49c0d1a440c0d41da7993"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "c53c8c09cc766a9d7cde5a247cc08e3c"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "79dd0ca11094669a17b98d96c46fadce"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "601cd4f39c7d319e721cda23723e2620"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "03fbff68c95e7196144807e2966e508f"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "752f688683e96ed41649f9c51ec72b61"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "d019c1053289fd9469c5280e22b42f6a"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "bac5ae6155936c667cbb8de62dbcdcc7"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "db610679cfc9eb8e97338aa7b2eab73a"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "b5fb2ae853db070480d09a76c3b4940d"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "14d0821fb287e7c0c7062d3bbc2dd9df"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "334c5696712049dd939664f60d7be996"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "6887659e58a51fd92d2f2d6710720596"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a18424ab79802d42849ead11f38b3f6a"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 33443840,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 28518400
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 33433600
}
],
"md5sum": "74d93b21554057fd3e7bd1b7992c4b28"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "33a18d549487ab491d51cd2bda311d90"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "e0d330ff6cb42009b27750d18ffd3c1a"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "082f67912ffabfc40a537058a7962dd7"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 13281280
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 18196480
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 31303680
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 32942080
}
],
"md5sum": "0619bc62d8cef0393596e8bd0d744683"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "57564e6b80d99e3374019ffc1eab9fc9"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "0f9e434aac95c0dc4ed772e0d2ce63e1"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "5399a853abd238072e13f01176f1d0ea"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 13281280
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 18196480
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 31303680
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 32942080
}
],
"md5sum": "0a43ad732c4d5af7f550a5635bc3ede4"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "38e9385d0a27ef2f521c16234cd452bc"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "d3a704c16efe9e3778b3d877532789ac"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "0af6740c6874dcd6110207a0383ab589"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 13281280
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 18196480
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 31303680
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 32942080
}
],
"md5sum": "5263d086b262c7359f769d5b3466a8f1"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "690407769abc563de7f4f6cac28b99a4"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "eafa67f8b00506b3c7191c4bf1d0dbea"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "8cda6e0f8b962d7265279096b513f676"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 13281280
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 18196480
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 31303680
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 32942080
}
],
"md5sum": "be53125e71a33c77785c055b1b33c4c6"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "4b5bc8bf706bbdf39078da737a71f65f"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "da5915ea6266b55dce05190532e75d7c"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "48a330901b9566fddfe8aca05e00b84f"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 13281280
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 18196480
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 31303680
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 32942080
}
],
"md5sum": "6651c1f273e728fcc6e21d6b9f0f9f9d"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "2b0501edbb8b7bcc34e48e50be013697"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "c41dd59216d48bf28324717d4ed18e4d"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "f4e5c660a6ea09bdd3ad7e1c3c0fd7cf"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 13281280
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 18196480
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 31303680
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 32942080
}
],
"md5sum": "a97789b9d1eb9910b26c66da96e52345"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "336864eadc4727d89d69293f54a1bd43"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "7103c835067d805630c9592e0e6afed4"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "36f30b74245e69f231a1c778e5bc82ec"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 13281280
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 18196480
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 31303680
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 32942080
}
],
"md5sum": "d280ee37f355eb99aa1060f8e80c0364"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "ba5732c2696fedb6180e6a50c1c7a719"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "666524f24f291e44615316e3ca0077c9"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "c02092a6bd46e3e6584a1d7e2768ba1a"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 13281280
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 18196480
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 31303680
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 32942080
}
],
"md5sum": "284149228851a73c9d25af5e5c5fafa8"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "5a67e91cd564dfed3c3f739f429d36d7"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "be253bf57ad1829429f22f82d9f7b90b"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "c6ab7db40620abed1b0196f1af9101a2"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 32460800,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 0
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 4423680
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 13271040
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13281280
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 26388480
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 28037120
}
],
"md5sum": "ca16bc464857ff3e3639780f7b94a437"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "46afeaa52c94c849dacc18a1c32b0234"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "30455559f125674223798144795a3f05"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "c89b0d442cabef6d970c6a34da4d3bd4"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "0a2edb147e51488636581ab848428dad"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "47a2489dbb149ca4c2d3e37abe87d858"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "8cd44f849a47b67011a8db80c792baa0"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "20ff3539354f912a08b62c7d09087737"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "856fb77ffc906ee591962d6b3c8b2d26"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "e7c0d37667791047a0e1162d5d7abb98"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "1014209eaf8dae72d64db90cd230c639"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "20325c1d981fd3fe0b3231761bb2553e"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "f323226e3442e5bc6d457c935d3a79fe"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "424977f5a8781dd4e5e5b5efbebf1193"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "033e3ae12a200c5b7f5a452b1a3c16bd"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "1cd454cb4b01ff79d86e137686ca888b"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "b4d7f3cee6a3e5406377fbc1a96b2209"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "c060478f84feb857bdab9bda1806081d"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "86fc2834eb763f0a6fdb854f251b59b7"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "66824c6c90e0fea23f2607613bd1383d"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "d8618d51db4150b407f371462c3feb84"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "5a076b796d0903c85f92898d56bcbd7d"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "04d08e2bfcb13ea2107d6ea76276b71c"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "bcf1b18ccfdf5c6e66c42d84e2ccc40a"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "1fcd6509ba64190fadba3681ed4d30a9"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "62fd91aaba876c4b17f5e6069fa017bd"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "af49842244e3d2be80b9f5a7f3dc043b"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "18eba13dc533bedbc858cb55bb61d9f7"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "64e375896702b3e06a70417b7fddd5fc"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "5d0e4f0e90ea81f4d1a9040918d998c3"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "ab4ea63e346ee3b78d509e6168778716"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "ca75a163e624d0df0a34185e073f632d"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "0b9f531a42e72624eacc7c54c135773b"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "95f71cf7c8f3ac38eb909e1542a4df35"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a19f5d434b13a803d64e702cdc4554e3"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "3f5c558d3910462c8b4829faf8e46a08"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "4566fbd33eceb6f2d9400c8dcf3908ba"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "147dd03595f6aae7d3a8209ddb968870"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "0d99f153e89b23d4c08efbf4e2dac5f3"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "54d4144a95111e1be5ed71106a8d739e"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "9590cb6e326bc1cd55ef8619d56dc004"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "f35bcfd078601351aae3118424aaa010"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "2fd5274bfec2e65b20dc8cde9019ac9f"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "fbe4530b60c0c3bd42436840933754a9"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "cba14a78ec59e2081be29d14dc7eca1f"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "c5d4a4123a65c63f988fb95981b04da4"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "0946a846b05f95cb14d8266f1119778d"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "764c91a15eb8277216e8fe76364138a4"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "f55f2291debdca2bac369c64133b4880"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "a73151c55f66df7e8c87b9343cae6db5"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "81193c1d76e7638ff5a9929702a86b02"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "1f0c20a79f0d6c53c502aff20a24c7a7"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 32952320,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 26880000
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 28518400
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4423680,
"byteOffset": 28528640
}
],
"md5sum": "895149fcba73736876c377d38c197257"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "f5a26522addda67c92282e9fc7ace522"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "05e9d3e6c73fbe2b9137188b867bac92"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 28518400,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 10240,
"byteOffset": 8847360
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 8857600
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13772800
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 26880000
}
],
"md5sum": "8961854d44d8c0790d463c9827b9c85e"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 39321600,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
15360,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 39321600,
"byteOffset": 0
}
],
"md5sum": "e5fbc06bc743100012e6e3664964deba"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 28508160,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 8847360,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
15360,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4915200,
"byteOffset": 8847360
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 13762560
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 1638400,
"byteOffset": 26869760
}
],
"md5sum": "4bdf82c8ad60b34a6f1e99d38ed78cc9"
}
]
}