|
{ |
|
"metadata": { |
|
"ParamSize": 485, |
|
"ParamBytes": 11172753408.0, |
|
"BitsPerParam": 4.097919499366067 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b9b9e5d249b538a2a23cd9d6019981ce" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b75a004c453f62cb53310dd9f39a4e50" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24391680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.0.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21233664 |
|
}, |
|
{ |
|
"name": "model.layers.0.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24379392 |
|
} |
|
], |
|
"md5sum": "e6520c88c12f921f441acee5398ff07c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f3ebb9920e8b85c57da98df48847ce9d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 284295168, |
|
"records": [ |
|
{ |
|
"name": "model.tok_embeddings.q_weight", |
|
"shape": [ |
|
92544, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 284295168, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ca14145a0c0c4c397f63d05d3efc2835" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35536896, |
|
"records": [ |
|
{ |
|
"name": "model.tok_embeddings.q_scale", |
|
"shape": [ |
|
92544, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35536896, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0ea7afeba332ab1e5b9532773d69a3b1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "77dfcb6bdbf516add8ecfff6d44965d2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4d57d99f602e5e7564264495c55bf3b6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1692de5d18f15e7c01bb91a26dc0230e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.0.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.1.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.1.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.1.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "98e4b0820b49d5bccf6fe1dbb8be7ce2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "72a985405bb3d53f60b97bf1e2332e2b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cf1fa92d5b1a4100c6549a2c884b4983" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fe5145ac7899a1d1de150d2ebc809cff" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2034c0e4cf1503838051c15fed1a11bc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.1.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.2.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.2.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.2.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "238d86afda94541e05ffba291879c288" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b038ebb996dcdbcda9e26155429e364f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e9f17c0201ce0717438d68866358cd96" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8fbac1aab61a4fc5a7c43923d2318165" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0937017c79435c4e9fe98bb3e03f40a6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24391680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.2.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.3.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.3.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
} |
|
], |
|
"md5sum": "a36f20dc7a6ffe035aa2583d09804c63" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31457280, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 12582912 |
|
} |
|
], |
|
"md5sum": "2bba0fd99b16bc9c746f856e35ebdd73" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0ffab72d9b34516e2118eace3282a859" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30683136, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 2359296 |
|
}, |
|
{ |
|
"name": "model.layers.10.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 27525120 |
|
}, |
|
{ |
|
"name": "model.layers.10.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 30670848 |
|
} |
|
], |
|
"md5sum": "721df0faac45e9bed34d03d5722df4fa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e06a46bd4ff4616470764256b08ccaa5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e94c72f18607594bf53fca9f8ab6e9ed" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a2b1f939810b4c0898f0503237eafac4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25190400, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.10.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.8.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.8.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 18898944 |
|
} |
|
], |
|
"md5sum": "c8057dea8cd1821e44c0c9dd91c41d3a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31469568, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.8.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.9.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 12595200 |
|
} |
|
], |
|
"md5sum": "1466895e1c5cbc65be0449eae1ba0765" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1c148c5952261b2e18fd0a68e1edd1c9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30683136, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 2359296 |
|
}, |
|
{ |
|
"name": "model.layers.9.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 27525120 |
|
}, |
|
{ |
|
"name": "model.layers.9.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 30670848 |
|
} |
|
], |
|
"md5sum": "2113efd5882d73352a9dca055ccb605f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b4e775cd3ccdb408b60c073ade853922" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "97fac31ac46ddf3359658e56238b008a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1b10c5106b6f876782ff41ebcbb8c55e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fd7a2553ea053177b9c55ac60d7fba99" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.9.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.11.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.11.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.11.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "d7984a5f3fe1753cd27283f9d0159c6b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "02fae08b842e37606de9c803d5488f48" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aa42bea3a653e85154609159e5a73fcf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fe075a318e0f8b16a4d2a8368d66413f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "45c07eaec64f198398cffa978d9a83f5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.11.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.12.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.12.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.12.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "162ac15e1b59aeb7d672ca7ec7fa4f42" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "be62d70497cdc602815537e2b6187b40" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3a2b112ed21e5a0b5420a8c62d458bca" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cd5d417be0799a24a579c9fb626146a7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ee35ec38f8c514405574ed5a4f3c0d1d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24391680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.12.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.12.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.13.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.13.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
} |
|
], |
|
"md5sum": "f24ddbaf32a5d7f5b71c7e5cee8d01e0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b5aa78602d686a8a798ba8edc5ea12a8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e70489449fb9987fb1923e5ae5f20f3e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2cbdca22297f8fb2492e184b2498c5ef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bf26d2932c39e61d8f16d72d1a1007cb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24416256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.13.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12595200 |
|
}, |
|
{ |
|
"name": "model.layers.13.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.14.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "model.layers.14.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21258240 |
|
}, |
|
{ |
|
"name": "model.layers.14.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24403968 |
|
} |
|
], |
|
"md5sum": "4c345ead3a6d1626b0fa94d39f760f42" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "05ed145f96a749f1e5485007ba9bdff3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "91b196f1ee5b0c9fc7eab50972cfb94d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "46190f54ec12b2d53e0f4a39ff7606e0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "65a10a2088cbe633298f90961d3008c4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.14.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.15.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.15.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.15.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "5422bdbf876020a942b0ae48542e997a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "40d406373edcfe671148f51c58188b3d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f36308450b85047579030599cd4807fe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5d54b81a65ed98f8cb420b3ac5c810b3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "92128fdd9d2ce3d54b5cf38b9bc28391" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.15.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.16.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.16.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.16.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "e6888c0a747108846cc9f2eb2cb2c98a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "246725ecc77323925205f98ad2ad48f9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bf703b923c7bab71f5bf4e4b5f892a76" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3d97de2553397e1bb18768648aa64868" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "76dcedeef122b7058d63692923aef750" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.16.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.16.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.17.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.17.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.17.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "47a0c159a9a9a3fb72c2b811faf05d0a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "490c21a440f5e1714261de6a9549760c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "db9a510efeca4751586b1ba77c862e90" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7b040c1c18f4abc6b9a2e61411dd13b7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "38dccce8fae7230bdaf6908b657314df" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24391680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.17.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.18.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.18.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
} |
|
], |
|
"md5sum": "14a71172fc59a14a8f2f3c5e9b343fa3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b09d8cac1fa6217954b45c6dd9bf8a5f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7d623a78548db9de6ecb0ba26caf9473" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "830dd81fda54b730aa38974808d42f8b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ecdcdee88ef207c0edfff16c4963c792" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24416256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.18.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.18.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12595200 |
|
}, |
|
{ |
|
"name": "model.layers.18.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.19.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "model.layers.19.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21258240 |
|
}, |
|
{ |
|
"name": "model.layers.19.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24403968 |
|
} |
|
], |
|
"md5sum": "af9790c05f42784abfeda3221644f278" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "353670f439163af331a12230bbb91c31" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "68d29e416891959e83934fd381c3d5d5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e68baff97678fedcd3690b52f3570b6f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "23b1527e1c6ca325159ad712bb0d9392" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.19.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.19.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.20.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.20.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.20.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "43e154db72c887328011b222eddc1d55" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "13717ffad5b2cf7c094193bde6a283d5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "37274533a84b02617807533fbd266104" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c2f2273008df8d15839af1d97d24dc1d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "31b73de45831f78b149432353e767fd1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.20.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.21.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.21.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.21.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "3de2c2a06fc1c7ffe7167fe61d34680a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "86af285350ed289af29807476b3f9b97" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "89aecee669e406a982e7ea38e48d5e5b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c7d7401860f343773355a3c115f23f8b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f7f068722b55e80671ec8d759209c9ac" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.21.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.22.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.22.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.22.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "00d5d456e1cab93959fa3cd1714fe941" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5ebe811f4c8b0122fcdc1060af17df4f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8471703fa9b94b2c3fcf5eba6455dd60" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "76f0d2a2efcae8fa029a95f8ee24a246" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4a6b1934376dc0218dbac30ed21a9d5f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24391680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.22.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.22.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.23.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.23.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
} |
|
], |
|
"md5sum": "f44fe42d3a651dd8833e7bf0e00bf3a9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ac1dc6aaca1c4d25bbd6a4af1511ff10" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ba7c52d984110124ed7f4baa601ad9ab" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fbb39581ba624860e4669edfba7247e1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0399dc767317b8c3adc8f4ae04f2e113" |
|
}, |
|
{ |
|
"dataPath": "params_shard_99.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24416256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.23.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.23.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12595200 |
|
}, |
|
{ |
|
"name": "model.layers.23.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.24.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "model.layers.24.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21258240 |
|
}, |
|
{ |
|
"name": "model.layers.24.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24403968 |
|
} |
|
], |
|
"md5sum": "b5e0a5bc5eaaf33e740699713867945f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_100.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "abecd49f7941e61eea6b8c100cbcdc14" |
|
}, |
|
{ |
|
"dataPath": "params_shard_101.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "64399e2619d7a68fd741391cc395cf20" |
|
}, |
|
{ |
|
"dataPath": "params_shard_102.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5321c22526b257abae7ace77691f8f25" |
|
}, |
|
{ |
|
"dataPath": "params_shard_103.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fa386f927b426e09c683e2c0d7a6af69" |
|
}, |
|
{ |
|
"dataPath": "params_shard_104.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.24.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.24.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.25.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.25.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.25.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "e99c9703a632dfafc70450a02c4ecd9d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_105.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f0a979ea49807bbe454f4a697e6a6af0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_106.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a0f3fe8acc8c07a35822ed17144eccd7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_107.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "22b9e6d91cb37f08c0f50dc0b336a60a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_108.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f40f12f4b4550c84248a2efc1b7bbb36" |
|
}, |
|
{ |
|
"dataPath": "params_shard_109.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.25.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.25.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.26.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.26.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.26.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "e635be28861255c987c0b2f55e4811ab" |
|
}, |
|
{ |
|
"dataPath": "params_shard_110.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4de9a0fa239fabd4ea050e6ad13eebdd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_111.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "05170ab9db2c8aad45f180ea988722e9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_112.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d46ebfefe27eb12184a000ec9c408610" |
|
}, |
|
{ |
|
"dataPath": "params_shard_113.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b42151e035e34fd5211c53d960d6b8b2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_114.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.26.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.26.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.27.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.27.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.27.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "2dca9dd7364013a355cc0cc816eb51ab" |
|
}, |
|
{ |
|
"dataPath": "params_shard_115.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0a2c75767433aea6df65cbc53bec2167" |
|
}, |
|
{ |
|
"dataPath": "params_shard_116.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2b59db67da1499944f0ab0a729bd7154" |
|
}, |
|
{ |
|
"dataPath": "params_shard_117.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f28b8d9086d55030b4308cf42bac76ae" |
|
}, |
|
{ |
|
"dataPath": "params_shard_118.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "345efd5241119907c753fbb3db159cf9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_119.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24391680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.27.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.27.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.28.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.28.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
} |
|
], |
|
"md5sum": "51f608415b50d03292e428f7346f8858" |
|
}, |
|
{ |
|
"dataPath": "params_shard_120.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4d589f4ebf7397dea54d54df9c60f02f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_121.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2e9327a41378044126474c294689a3fc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_122.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "593cc919dbc8eb0d75eaa59b0b3ef6ec" |
|
}, |
|
{ |
|
"dataPath": "params_shard_123.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9bad7a0e49fb4f8fa4147214514b4883" |
|
}, |
|
{ |
|
"dataPath": "params_shard_124.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24416256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.28.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.28.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12595200 |
|
}, |
|
{ |
|
"name": "model.layers.28.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.29.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "model.layers.29.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21258240 |
|
}, |
|
{ |
|
"name": "model.layers.29.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24403968 |
|
} |
|
], |
|
"md5sum": "576d510787bfb1e7b012a0c52da43c4e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_125.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b6352dda9da320bd8a71fd182f2fcf92" |
|
}, |
|
{ |
|
"dataPath": "params_shard_126.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8e83e004ea05000f072f5086400a0682" |
|
}, |
|
{ |
|
"dataPath": "params_shard_127.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9e2954aadd935b9e64748a49abf7202d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_128.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2413d9696a13000692c530d69ceb5f98" |
|
}, |
|
{ |
|
"dataPath": "params_shard_129.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.29.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.29.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.30.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.30.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.30.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "6c4381ff3020c4b4bcdd27c3aa20882c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_130.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "42f17b9c6aea608c2132b8a5414977e5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_131.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dcdbdffde2d4812ce4c25c4ab293fff3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_132.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0d441147c8c220d184bc7a29a7c878c3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_133.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d614ff9d28eab46ca9e2c7019486ef92" |
|
}, |
|
{ |
|
"dataPath": "params_shard_134.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bdd6f89340e69f6715dcaedf8aa83781" |
|
}, |
|
{ |
|
"dataPath": "params_shard_135.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30720000, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.30.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.30.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.3.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.3.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "model.layers.3.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 25190400 |
|
}, |
|
{ |
|
"name": "model.layers.4.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 25202688 |
|
}, |
|
{ |
|
"name": "model.layers.4.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 27561984 |
|
}, |
|
{ |
|
"name": "model.layers.4.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 30707712 |
|
} |
|
], |
|
"md5sum": "c8de6ad9681b6e394bc8819849b03756" |
|
}, |
|
{ |
|
"dataPath": "params_shard_136.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7cad850ad5042fc7f5a9fc8cb0874308" |
|
}, |
|
{ |
|
"dataPath": "params_shard_137.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "915fc8f2006351af222d37a9044dde10" |
|
}, |
|
{ |
|
"dataPath": "params_shard_138.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "972f85a3f93ae5f84bff1da4fa5e0030" |
|
}, |
|
{ |
|
"dataPath": "params_shard_139.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b73ac7ae2b42939e5eea0eed24c0959e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_140.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.4.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.4.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.5.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.5.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.5.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "8a319ed0157880eb0e20298ae24a9137" |
|
}, |
|
{ |
|
"dataPath": "params_shard_141.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0c4b8414a4671aaea6b15db3740089c0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_142.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e7001ce4fc17f9de8ec82fae94ff06aa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_143.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7a25b3067ac0d4e6d240b588ccf4d37d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_144.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9da3d0628fb4d6bbd4ef79079a329765" |
|
}, |
|
{ |
|
"dataPath": "params_shard_145.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.5.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.31.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.31.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.31.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "055a8997ca2eee22cd74d1b45f01bead" |
|
}, |
|
{ |
|
"dataPath": "params_shard_146.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "24a5a4dbc97c24e14f5813d2d3203bda" |
|
}, |
|
{ |
|
"dataPath": "params_shard_147.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "78cf1b841c2d8bca6eed41273d6842e2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_148.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6b4a25b89401c71edd3e3e9fd1cf8b8f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_149.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "95ef384812d5bcb40783faea1485d38c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_150.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.31.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.31.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.32.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.32.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.32.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "68289130bb93e6f4eac3bdf3d284b401" |
|
}, |
|
{ |
|
"dataPath": "params_shard_151.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a2d72dbf335134fa3c889848a0264f43" |
|
}, |
|
{ |
|
"dataPath": "params_shard_152.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d82c9855b12685d6de514a7923332cf2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_153.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5834dae9f685eeb46df9ce91a4ae6f49" |
|
}, |
|
{ |
|
"dataPath": "params_shard_154.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "281aec78040f897d50bba3dc23421143" |
|
}, |
|
{ |
|
"dataPath": "params_shard_155.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24391680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.32.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.32.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.33.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.33.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
} |
|
], |
|
"md5sum": "d11703fb84b835d9e1ee31e75753908b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_156.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d48c63086b925cf7880dbb60170aed98" |
|
}, |
|
{ |
|
"dataPath": "params_shard_157.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0570540df18fc7cfcfe6417c2b976400" |
|
}, |
|
{ |
|
"dataPath": "params_shard_158.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8db337e43f10305f1dc9ca9f12999301" |
|
}, |
|
{ |
|
"dataPath": "params_shard_159.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "adec4a6bd21f3574bbd467bddae41451" |
|
}, |
|
{ |
|
"dataPath": "params_shard_160.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24416256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.33.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.33.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12595200 |
|
}, |
|
{ |
|
"name": "model.layers.33.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.34.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "model.layers.34.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21258240 |
|
}, |
|
{ |
|
"name": "model.layers.34.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24403968 |
|
} |
|
], |
|
"md5sum": "63bf25fe91c0791e55de7b2e8b19a4cc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_161.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "77f3f7ffb311e4b9a9ca02cffd053ec6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_162.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "93d218d704627aa09e6795f93d91250f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_163.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dc41359b12ab75c08ab15da606f1813d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_164.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9e5e83ff86b12484dd4e71651e9c2a2e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_165.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.34.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.34.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.35.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.35.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.35.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "06aebcfd006c39b66d096ec2ad7bd0db" |
|
}, |
|
{ |
|
"dataPath": "params_shard_166.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "96da9a8c9573fd0ce9e5fa3319b3ce79" |
|
}, |
|
{ |
|
"dataPath": "params_shard_167.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "18e21374c9266abf6a1893f6b19824e1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_168.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4f732c8876e920461a3ac01bf1db04c4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_169.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b6447ad2d5fb20fdbd4f2892b8a03f54" |
|
}, |
|
{ |
|
"dataPath": "params_shard_170.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.35.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.35.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.36.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.36.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.36.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "c06d8df77c349eb5f0e9bfa3248c6e3e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_171.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dede3e093af0ac37893fa06367aaf846" |
|
}, |
|
{ |
|
"dataPath": "params_shard_172.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9ac03336871aa4003a8c40b5ad0429e9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_173.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3e4004827b55b177a7ba54d3592bc1b0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_174.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2481e0b4d019163e0edfe2ae03393001" |
|
}, |
|
{ |
|
"dataPath": "params_shard_175.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.36.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.36.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.37.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.37.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.37.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "0474791c9ced2b4366cdd93234349050" |
|
}, |
|
{ |
|
"dataPath": "params_shard_176.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "645222585421be65fa6708fc85743a42" |
|
}, |
|
{ |
|
"dataPath": "params_shard_177.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "79061c56e9d8f780488219cfb82d3d1f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_178.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "46bda62cb8057c1f52ae35424526dc1b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_179.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9662209a638db26ba256c909b99a34ad" |
|
}, |
|
{ |
|
"dataPath": "params_shard_180.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24391680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.37.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.37.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.38.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.38.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
} |
|
], |
|
"md5sum": "48354f641c85a8d5988634844e168b85" |
|
}, |
|
{ |
|
"dataPath": "params_shard_181.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "00b0cb59d8217ae84612648a467ac555" |
|
}, |
|
{ |
|
"dataPath": "params_shard_182.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e5cbe25654de4a7b204a3bb524335f9d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_183.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5478c9f09b9766d3b423114742828a6e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_184.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dd2f6f845ee3825755f8e566cc8d2c3d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_185.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24416256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.38.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.38.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12595200 |
|
}, |
|
{ |
|
"name": "model.layers.38.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.39.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "model.layers.39.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21258240 |
|
}, |
|
{ |
|
"name": "model.layers.39.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24403968 |
|
} |
|
], |
|
"md5sum": "84d4b73a97b64848a6474d89516de110" |
|
}, |
|
{ |
|
"dataPath": "params_shard_186.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "08e0d3541b27e6e0aac96c6fbba8826a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_187.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "81ca8b31e6c7e67cf7a94ce64753099b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_188.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c6eb99b73593a242a4a449193041a921" |
|
}, |
|
{ |
|
"dataPath": "params_shard_189.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f2cd3d7160965b9f6561e86558c5373a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_190.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.39.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.39.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.40.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.40.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.40.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "46a45114942ac1536a5bfbbbb258edfe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_191.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8d5c77f4235102f10fac0edffda3709f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_192.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fa4ab53f55f372541841f40b9e2a8cc5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_193.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b95b1f1845e488c54d88d517eab36217" |
|
}, |
|
{ |
|
"dataPath": "params_shard_194.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0e87a108ec599d27216e682a6c819421" |
|
}, |
|
{ |
|
"dataPath": "params_shard_195.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.40.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.40.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.41.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.41.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.41.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "810fbd5a022bc83f49588f0652187512" |
|
}, |
|
{ |
|
"dataPath": "params_shard_196.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7c96e0abf603520439cc06e66713527d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_197.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4cad6d7a1f0f0b6bbdbc63ad2ec9b19d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_198.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a01c4ec3d3566a6ee9b565733bf1d950" |
|
}, |
|
{ |
|
"dataPath": "params_shard_199.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ab36738df336067dd4ca9128c278df1e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_200.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.41.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.41.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.42.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.42.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.42.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "a9ae49484edcb9c4c6f49dc1987a9569" |
|
}, |
|
{ |
|
"dataPath": "params_shard_201.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f15f494aa91d4630037100b7cc223816" |
|
}, |
|
{ |
|
"dataPath": "params_shard_202.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bec4b8d962fd790d33b672bf28ade8f8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_203.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "50fcebef2f22b3a70553d627748d7bde" |
|
}, |
|
{ |
|
"dataPath": "params_shard_204.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bc2cc08f2dafd46150c5e6fe11f557eb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_205.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24391680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.42.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.42.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.43.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.43.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
} |
|
], |
|
"md5sum": "44c8668d170eb42a9bdc808fa40e4e3f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_206.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bfb97d805c9de9cd897d1482fb7ac431" |
|
}, |
|
{ |
|
"dataPath": "params_shard_207.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "962e406c298e9a03b4b816bc98b6423f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_208.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fb5bae95e382db294d05491b63244046" |
|
}, |
|
{ |
|
"dataPath": "params_shard_209.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "58b8076966f4cdd9833ea641a0aba4a9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_210.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24416256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.43.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.43.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12595200 |
|
}, |
|
{ |
|
"name": "model.layers.43.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.44.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "model.layers.44.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21258240 |
|
}, |
|
{ |
|
"name": "model.layers.44.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24403968 |
|
} |
|
], |
|
"md5sum": "8cbf2ae05b0257047c056673f64aab40" |
|
}, |
|
{ |
|
"dataPath": "params_shard_211.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "78c64985dd1e09e3d053e3da668a8f82" |
|
}, |
|
{ |
|
"dataPath": "params_shard_212.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1ad65acda5fbb096cbce23821100d0c8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_213.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dfab68d1cbb76d655583d24c6b809c6c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_214.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7f3125368c8e829120b04168d9446726" |
|
}, |
|
{ |
|
"dataPath": "params_shard_215.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.44.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.44.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.45.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.45.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.45.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "e93d197cf530def9dc0ccd25dee07f71" |
|
}, |
|
{ |
|
"dataPath": "params_shard_216.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2b9e842bf09a26e1031090aee3f33e78" |
|
}, |
|
{ |
|
"dataPath": "params_shard_217.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "89ce957b587abf7bcca0f5f54b7092ba" |
|
}, |
|
{ |
|
"dataPath": "params_shard_218.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9429ed983670223155944d2a4a865031" |
|
}, |
|
{ |
|
"dataPath": "params_shard_219.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9d632c55535cdce5571ae9692a5e8f7a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_220.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.45.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.45.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.46.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.46.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.46.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "be8fe44dfff8ad5da154c060124b5a27" |
|
}, |
|
{ |
|
"dataPath": "params_shard_221.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e73a84dba7df3edf3212aca4d43b892d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_222.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7a0791319f2cecc1731130321ccb01cb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_223.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5c3e84b69f0c7a3a63157560fa361b54" |
|
}, |
|
{ |
|
"dataPath": "params_shard_224.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "67efaa8dc17af7cbe4a10922605975d6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_225.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.46.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.46.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.47.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.47.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.47.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "ff36f2a1a4a79fd1edf4d84753c06fe1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_226.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7b242ef01f8bc94265b14c88f4fb5a85" |
|
}, |
|
{ |
|
"dataPath": "params_shard_227.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4096c8406ed4e677033af46ad337d0df" |
|
}, |
|
{ |
|
"dataPath": "params_shard_228.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ed49904cb5632a1ebad2f2247c5d4fc8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_229.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fc7e1357057d708152c38af23e367a85" |
|
}, |
|
{ |
|
"dataPath": "params_shard_230.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24416256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.47.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.47.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.6.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "model.layers.6.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21258240 |
|
}, |
|
{ |
|
"name": "model.layers.6.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24403968 |
|
} |
|
], |
|
"md5sum": "40c0dc0ccabfe5f1d9f78b726a360031" |
|
}, |
|
{ |
|
"dataPath": "params_shard_231.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cc7f3e7e05fdb6a0c73600ef94990adc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_232.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fde36954854070812585cb2c59d503ab" |
|
}, |
|
{ |
|
"dataPath": "params_shard_233.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "18a104c8923e0d56d349f11f43793f0f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_234.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7864bfac4b3dc9bffd17fced37cf9be1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_235.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.6.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.6.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.7.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.7.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.7.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "fb98f5558e6c59c07776b7b225f736c8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_236.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6facd90a35565420697823941fb45f41" |
|
}, |
|
{ |
|
"dataPath": "params_shard_237.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3ac01a84379a87490d93c3556016fc7c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_238.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fde46571de8b1829bf5efac03918bd9c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_239.bin", |
|
"format": "raw-shard", |
|
"nbytes": 284295168, |
|
"records": [ |
|
{ |
|
"name": "output.q_weight", |
|
"shape": [ |
|
92544, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 284295168, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "55f61b9267d7e0afbe4a1198dbd3d3b0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_240.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35536896, |
|
"records": [ |
|
{ |
|
"name": "output.q_scale", |
|
"shape": [ |
|
92544, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 35536896, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "89f25a89c637723b976edcd9b8abe8bc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_241.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24391680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.7.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.8.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.8.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
} |
|
], |
|
"md5sum": "4be47898516de3750155af2c09274973" |
|
} |
|
] |
|
} |