|
{ |
|
"metadata": { |
|
"ParamSize": 325, |
|
"ParamBytes": 4836966400.0, |
|
"BitsPerParam": 5.000929019225669 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e1a971bb732b40a670ab52c5ad3f6c64" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9f995329de777fafcae8dbd53da8c5b3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30941184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.0.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.0.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.0.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 23592960 |
|
}, |
|
{ |
|
"name": "model.layers.0.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 23601152 |
|
} |
|
], |
|
"md5sum": "edb4ee2153234557237d3a5341e9f64b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b1a080619ddef7619609b9599ac73074" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27279360, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 3670016 |
|
}, |
|
{ |
|
"name": "model.layers.1.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 3678208 |
|
}, |
|
{ |
|
"name": "model.layers.1.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 12066816 |
|
}, |
|
{ |
|
"name": "model.layers.1.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 13115392 |
|
}, |
|
{ |
|
"name": "model.layers.1.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 25698304 |
|
}, |
|
{ |
|
"name": "model.layers.1.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 27271168 |
|
} |
|
], |
|
"md5sum": "d0d9a878be4936372649a092013fc530" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "41f6a24a868189a8e0016c982c728ebb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33038336, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 7340032 |
|
}, |
|
{ |
|
"name": "model.layers.1.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 11010048 |
|
}, |
|
{ |
|
"name": "model.layers.10.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 11018240 |
|
}, |
|
{ |
|
"name": "model.layers.10.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 19406848 |
|
}, |
|
{ |
|
"name": "model.layers.10.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 20455424 |
|
} |
|
], |
|
"md5sum": "5659d69e9a16e663f817664d1de01534" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3ab6ef410e14875685563d8d8875de75" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a753cef1ad4fd880a484002919bd616c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22036480, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 1572864 |
|
}, |
|
{ |
|
"name": "model.layers.10.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 1581056 |
|
}, |
|
{ |
|
"name": "model.layers.10.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 8921088 |
|
}, |
|
{ |
|
"name": "model.layers.10.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 12591104 |
|
}, |
|
{ |
|
"name": "model.layers.11.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12599296 |
|
}, |
|
{ |
|
"name": "model.layers.11.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 20987904 |
|
} |
|
], |
|
"md5sum": "6852d2c58f5477db8d818b943a3019c3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6f795e5006b230f8abbf1614b82f85b0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "76c7558c73ad8c1f6e079c3106925c10" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25182208, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.11.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "model.layers.11.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14163968 |
|
}, |
|
{ |
|
"name": "model.layers.11.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 21504000 |
|
}, |
|
{ |
|
"name": "model.layers.11.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25174016 |
|
} |
|
], |
|
"md5sum": "3b4efde4ce6941213fd60c4720b9ac0e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0432a7d6b295b18e1ab3ed658a421043" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5b7f0536b19495bef74c81c320f0ed44" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30941184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.12.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.12.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.12.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.12.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 23592960 |
|
}, |
|
{ |
|
"name": "model.layers.12.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 23601152 |
|
} |
|
], |
|
"md5sum": "345371c33d44aa4e879a689187c0e8c3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2accf15e8563de4728a0ccdc921e2743" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27279360, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.12.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 3670016 |
|
}, |
|
{ |
|
"name": "model.layers.13.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 3678208 |
|
}, |
|
{ |
|
"name": "model.layers.13.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 12066816 |
|
}, |
|
{ |
|
"name": "model.layers.13.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 13115392 |
|
}, |
|
{ |
|
"name": "model.layers.13.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 25698304 |
|
}, |
|
{ |
|
"name": "model.layers.13.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 27271168 |
|
} |
|
], |
|
"md5sum": "0076e7ddf4a871de87de63ff1ab4954f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "90ef7766e79ff85f7c457d129a80e775" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33038336, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 7340032 |
|
}, |
|
{ |
|
"name": "model.layers.13.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 11010048 |
|
}, |
|
{ |
|
"name": "model.layers.14.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 11018240 |
|
}, |
|
{ |
|
"name": "model.layers.14.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 19406848 |
|
}, |
|
{ |
|
"name": "model.layers.14.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 20455424 |
|
} |
|
], |
|
"md5sum": "1267685de167ee3d89ae81caef35c5dd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "eb2e307351ddd3e108073fe25d8baab0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cb82922ccae1e86901e48ec4bc7bba4c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22036480, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 1572864 |
|
}, |
|
{ |
|
"name": "model.layers.14.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 1581056 |
|
}, |
|
{ |
|
"name": "model.layers.14.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 8921088 |
|
}, |
|
{ |
|
"name": "model.layers.14.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 12591104 |
|
}, |
|
{ |
|
"name": "model.layers.15.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12599296 |
|
}, |
|
{ |
|
"name": "model.layers.15.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 20987904 |
|
} |
|
], |
|
"md5sum": "97a022310ce21bdc2574886422f230f4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "161dc3ccacfb29874d7ab92d4ab8505e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "de02ecb2c3616bc79dcc91b9a65fba0d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25182208, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.15.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "model.layers.15.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14163968 |
|
}, |
|
{ |
|
"name": "model.layers.15.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 21504000 |
|
}, |
|
{ |
|
"name": "model.layers.15.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25174016 |
|
} |
|
], |
|
"md5sum": "e31898c64ad2ec75c0b908a3c8ee4b91" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "30f895ebd507e5baff590731706f8f53" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "69790aa74fa90dd0f692ced93ec7eced" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30941184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.16.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.16.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.16.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.16.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 23592960 |
|
}, |
|
{ |
|
"name": "model.layers.16.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 23601152 |
|
} |
|
], |
|
"md5sum": "24bc3acaf8c900b427667fec8cbccfcd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e20303521c6e3f182fac63f68357ee9e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27279360, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.16.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 3670016 |
|
}, |
|
{ |
|
"name": "model.layers.17.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 3678208 |
|
}, |
|
{ |
|
"name": "model.layers.17.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 12066816 |
|
}, |
|
{ |
|
"name": "model.layers.17.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 13115392 |
|
}, |
|
{ |
|
"name": "model.layers.17.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 25698304 |
|
}, |
|
{ |
|
"name": "model.layers.17.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 27271168 |
|
} |
|
], |
|
"md5sum": "30e4f6ecfe9af0015a45c3168ed5621f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7c9af3b1501515cd7a3d9c5ae25b3f8e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33038336, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 7340032 |
|
}, |
|
{ |
|
"name": "model.layers.17.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 11010048 |
|
}, |
|
{ |
|
"name": "model.layers.18.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 11018240 |
|
}, |
|
{ |
|
"name": "model.layers.18.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 19406848 |
|
}, |
|
{ |
|
"name": "model.layers.18.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 20455424 |
|
} |
|
], |
|
"md5sum": "b10222b9f60012deb795a57ceec4ed9b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "43004c7f6f63693e32288e1a73f2a933" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6b228959a9f55c8b719acfcb3c693bb6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22036480, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.18.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 1572864 |
|
}, |
|
{ |
|
"name": "model.layers.18.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 1581056 |
|
}, |
|
{ |
|
"name": "model.layers.18.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 8921088 |
|
}, |
|
{ |
|
"name": "model.layers.18.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 12591104 |
|
}, |
|
{ |
|
"name": "model.layers.19.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12599296 |
|
}, |
|
{ |
|
"name": "model.layers.19.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 20987904 |
|
} |
|
], |
|
"md5sum": "8c2f1b1783e22c3ac7c03f5d31b7ce5c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "98e9f224cfcb62268c721c359d5f9407" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "154d0af6ceb7846d9c8cd0a18c02a9c2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25182208, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.19.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.19.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "model.layers.19.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14163968 |
|
}, |
|
{ |
|
"name": "model.layers.19.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 21504000 |
|
}, |
|
{ |
|
"name": "model.layers.19.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25174016 |
|
} |
|
], |
|
"md5sum": "59518c9f86826238d4be2be0148225fb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ab6c0db600b801f9d677e1c82ab572db" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a3b527c602c3f4884dff90ea0b324abd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30941184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.2.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.2.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.2.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 23592960 |
|
}, |
|
{ |
|
"name": "model.layers.2.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 23601152 |
|
} |
|
], |
|
"md5sum": "320535ae20f6f105c309b9f1d19dbcdd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d8a5ad7cea776ebd74de73845cd4f4b0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27279360, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 3670016 |
|
}, |
|
{ |
|
"name": "model.layers.20.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 3678208 |
|
}, |
|
{ |
|
"name": "model.layers.20.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 12066816 |
|
}, |
|
{ |
|
"name": "model.layers.20.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 13115392 |
|
}, |
|
{ |
|
"name": "model.layers.20.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 25698304 |
|
}, |
|
{ |
|
"name": "model.layers.20.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 27271168 |
|
} |
|
], |
|
"md5sum": "205573230975ac7fdc936bac20b01ec5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "94368cc95206e726a905af5ff1d6fea2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 7340032 |
|
}, |
|
{ |
|
"name": "model.layers.20.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 11010048 |
|
}, |
|
{ |
|
"name": "model.layers.21.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 11018240 |
|
}, |
|
{ |
|
"name": "model.layers.21.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 23601152 |
|
} |
|
], |
|
"md5sum": "fc2317ded1ce4a5c51d4545639d2613b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7544901df0331e6fcc841b15058184c8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ef516b59b7589fe7e6f2aa156fcc98d6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30941184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.3.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.3.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.3.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 23592960 |
|
}, |
|
{ |
|
"name": "model.layers.3.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 23601152 |
|
} |
|
], |
|
"md5sum": "b9f903eafff40813c2ab653b7650eea7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ee712dd298b03e05929c68775c652abe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27279360, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 3670016 |
|
}, |
|
{ |
|
"name": "model.layers.4.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 3678208 |
|
}, |
|
{ |
|
"name": "model.layers.4.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 12066816 |
|
}, |
|
{ |
|
"name": "model.layers.4.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 13115392 |
|
}, |
|
{ |
|
"name": "model.layers.4.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 25698304 |
|
}, |
|
{ |
|
"name": "model.layers.4.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 27271168 |
|
} |
|
], |
|
"md5sum": "f4be7d45c731699aba26844610cd9791" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7b6013dbc2716c48400bc9907b196192" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33038336, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.4.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 7340032 |
|
}, |
|
{ |
|
"name": "model.layers.4.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 11010048 |
|
}, |
|
{ |
|
"name": "model.layers.5.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 11018240 |
|
}, |
|
{ |
|
"name": "model.layers.5.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 19406848 |
|
}, |
|
{ |
|
"name": "model.layers.5.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 20455424 |
|
} |
|
], |
|
"md5sum": "7a4966022d62d72ce68da42556a84f0a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9a32f58d4c7054fcf22820703269ee1c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8251ae3163e5da75d66056a12fc5580f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22036480, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 1572864 |
|
}, |
|
{ |
|
"name": "model.layers.5.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 1581056 |
|
}, |
|
{ |
|
"name": "model.layers.5.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 8921088 |
|
}, |
|
{ |
|
"name": "model.layers.5.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 12591104 |
|
}, |
|
{ |
|
"name": "model.layers.6.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12599296 |
|
}, |
|
{ |
|
"name": "model.layers.6.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 20987904 |
|
} |
|
], |
|
"md5sum": "7fd840b8e17b76e9402e1bf827854734" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5364a89a0cc523b6db955f19f390e1cd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "48d2e8cca480752ea9a400409e46327a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25182208, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.6.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.6.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "model.layers.6.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14163968 |
|
}, |
|
{ |
|
"name": "model.layers.6.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 21504000 |
|
}, |
|
{ |
|
"name": "model.layers.6.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25174016 |
|
} |
|
], |
|
"md5sum": "810790ac7355c1ac008bd5316319b467" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ab570f4b971225ce4497f13c88932d6a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0fdfc8aaf9d95a22d40d4cf1af1e6fed" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30941184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.7.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.7.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.7.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 23592960 |
|
}, |
|
{ |
|
"name": "model.layers.7.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 23601152 |
|
} |
|
], |
|
"md5sum": "25c3af7d85f256d05d403600d10b3f14" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2cbef4bff080523c891c9e2047418249" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27279360, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 3670016 |
|
}, |
|
{ |
|
"name": "model.layers.8.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 3678208 |
|
}, |
|
{ |
|
"name": "model.layers.8.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 12066816 |
|
}, |
|
{ |
|
"name": "model.layers.8.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 13115392 |
|
}, |
|
{ |
|
"name": "model.layers.8.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 25698304 |
|
}, |
|
{ |
|
"name": "model.layers.8.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 27271168 |
|
} |
|
], |
|
"md5sum": "065034ed1ccf88de40e863ddd15f5ddb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a721d2781e07a89732d3d20fad0f25c7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33038336, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.8.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 7340032 |
|
}, |
|
{ |
|
"name": "model.layers.8.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 11010048 |
|
}, |
|
{ |
|
"name": "model.layers.9.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 11018240 |
|
}, |
|
{ |
|
"name": "model.layers.9.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 19406848 |
|
}, |
|
{ |
|
"name": "model.layers.9.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 20455424 |
|
} |
|
], |
|
"md5sum": "fca3fd541651866ca8be0014b6f6871d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "213d106c9a92ea043e048451441b9588" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4bb5a3a413454cfddbe21d22acc3af92" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 189530112, |
|
"records": [ |
|
{ |
|
"name": "model.tok_embeddings.q_weight", |
|
"shape": [ |
|
92544, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 189530112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "701d12658eba426ff3ad374de70ae421" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23691264, |
|
"records": [ |
|
{ |
|
"name": "model.tok_embeddings.q_scale", |
|
"shape": [ |
|
92544, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23691264, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "27069ea050c6150d4048f35b11794840" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ed61c8c7fb68917c7025a86a9a6278e1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0de7d1a5dda922d977710680843f02b7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33062912, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 1572864 |
|
}, |
|
{ |
|
"name": "model.layers.9.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 1581056 |
|
}, |
|
{ |
|
"name": "model.layers.9.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 8921088 |
|
}, |
|
{ |
|
"name": "model.layers.9.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 12591104 |
|
}, |
|
{ |
|
"name": "model.layers.21.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12599296 |
|
}, |
|
{ |
|
"name": "model.layers.21.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 20987904 |
|
}, |
|
{ |
|
"name": "model.layers.21.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 22036480 |
|
}, |
|
{ |
|
"name": "model.layers.21.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 22044672 |
|
}, |
|
{ |
|
"name": "model.layers.21.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 29384704 |
|
}, |
|
{ |
|
"name": "model.layers.21.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 33054720 |
|
} |
|
], |
|
"md5sum": "4b241dfaf1d2b5c95bbfb852d851d9a4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ccf30a64aa5340ddfe0c10facd046a05" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "80b16ce84cd1709780f8c39efa38c2d6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30941184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.22.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.22.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.22.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.22.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 23592960 |
|
}, |
|
{ |
|
"name": "model.layers.22.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 23601152 |
|
} |
|
], |
|
"md5sum": "a1f38e92e42141b59c5e7763f2678ae3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "324e756915aafbe6d7ad7f40b93487d1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27279360, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.22.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 3670016 |
|
}, |
|
{ |
|
"name": "model.layers.23.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 3678208 |
|
}, |
|
{ |
|
"name": "model.layers.23.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 12066816 |
|
}, |
|
{ |
|
"name": "model.layers.23.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 13115392 |
|
}, |
|
{ |
|
"name": "model.layers.23.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 25698304 |
|
}, |
|
{ |
|
"name": "model.layers.23.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 27271168 |
|
} |
|
], |
|
"md5sum": "33b1d25e1efe700c24bddfd9b6650d19" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f0833c8a1cfc610991630d286bca9bf8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33038336, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.23.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 7340032 |
|
}, |
|
{ |
|
"name": "model.layers.23.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 11010048 |
|
}, |
|
{ |
|
"name": "model.layers.24.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 11018240 |
|
}, |
|
{ |
|
"name": "model.layers.24.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 19406848 |
|
}, |
|
{ |
|
"name": "model.layers.24.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 20455424 |
|
} |
|
], |
|
"md5sum": "40a6002b0f3d911ae4d1d01e5809970e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "839c5b6c81e33831d7577d5039db568d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "db7c5c7fac0715b8288e3e1cbff2f7e5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22036480, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.24.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 1572864 |
|
}, |
|
{ |
|
"name": "model.layers.24.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 1581056 |
|
}, |
|
{ |
|
"name": "model.layers.24.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 8921088 |
|
}, |
|
{ |
|
"name": "model.layers.24.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 12591104 |
|
}, |
|
{ |
|
"name": "model.layers.25.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12599296 |
|
}, |
|
{ |
|
"name": "model.layers.25.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 20987904 |
|
} |
|
], |
|
"md5sum": "fd58035636492ab98cbae48f657a523b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3b7ba761ee0f0adb8ec7b63ffe480ffc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1cffdacb7ea756e12b604108367d9d5a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25182208, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.25.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.25.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "model.layers.25.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14163968 |
|
}, |
|
{ |
|
"name": "model.layers.25.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 21504000 |
|
}, |
|
{ |
|
"name": "model.layers.25.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25174016 |
|
} |
|
], |
|
"md5sum": "2e0a5a1bd67424bfd4d0f1b3705fdee6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4f35077de04c4f88ce49489ff88efb29" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "73baababf775f62478e2aac74782cd6e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30941184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.26.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.26.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.26.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.26.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 23592960 |
|
}, |
|
{ |
|
"name": "model.layers.26.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 23601152 |
|
} |
|
], |
|
"md5sum": "c956b25752bfdd000919b156626861ff" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8eee6030893ad8f94abe0bba09fa27b8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27279360, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.26.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 3670016 |
|
}, |
|
{ |
|
"name": "model.layers.27.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 3678208 |
|
}, |
|
{ |
|
"name": "model.layers.27.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 12066816 |
|
}, |
|
{ |
|
"name": "model.layers.27.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 13115392 |
|
}, |
|
{ |
|
"name": "model.layers.27.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 25698304 |
|
}, |
|
{ |
|
"name": "model.layers.27.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 27271168 |
|
} |
|
], |
|
"md5sum": "fa628d5deb1498f3d6badc6c78f83739" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "de563eddffc3fb578d927ee8f47dba2b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33038336, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.27.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 7340032 |
|
}, |
|
{ |
|
"name": "model.layers.27.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 11010048 |
|
}, |
|
{ |
|
"name": "model.layers.28.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 11018240 |
|
}, |
|
{ |
|
"name": "model.layers.28.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 19406848 |
|
}, |
|
{ |
|
"name": "model.layers.28.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 20455424 |
|
} |
|
], |
|
"md5sum": "f5227600302a6579e108fce07121dcb8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ff10d951093eafd5b3422e95c264646c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c5777c8401d7b52cf6627a9de18cfd28" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22036480, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.28.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 1572864 |
|
}, |
|
{ |
|
"name": "model.layers.28.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 1581056 |
|
}, |
|
{ |
|
"name": "model.layers.28.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 8921088 |
|
}, |
|
{ |
|
"name": "model.layers.28.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 12591104 |
|
}, |
|
{ |
|
"name": "model.layers.29.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 12599296 |
|
}, |
|
{ |
|
"name": "model.layers.29.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 20987904 |
|
} |
|
], |
|
"md5sum": "6b6fa4fd6add6df5fa57f4962c84080b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "606d3de3a9e19c563ed9289e617ba89a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5c3074ae22c9f6d11d1a4f483fec76f8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25182208, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.29.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.29.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "model.layers.29.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 14163968 |
|
}, |
|
{ |
|
"name": "model.layers.29.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 21504000 |
|
}, |
|
{ |
|
"name": "model.layers.29.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 25174016 |
|
} |
|
], |
|
"md5sum": "e863a8b4641e5ba83214f17278217ccf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_99.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0d3c0e94b9bbce90620a4824c1fc545d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_100.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2fb92b3d6e1ff3c3bba14fd08d44c3e6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_101.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30941184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.30.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.30.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.30.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 22020096 |
|
}, |
|
{ |
|
"name": "model.layers.30.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 23592960 |
|
}, |
|
{ |
|
"name": "model.layers.30.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 23601152 |
|
} |
|
], |
|
"md5sum": "baec48cd0e99c6161e34593c48084723" |
|
}, |
|
{ |
|
"dataPath": "params_shard_102.bin", |
|
"format": "raw-shard", |
|
"nbytes": 58720256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
28672, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 58720256, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f19e577f4534038e6a2375f7a5b1e238" |
|
}, |
|
{ |
|
"dataPath": "params_shard_103.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27279360, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.30.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 3670016 |
|
}, |
|
{ |
|
"name": "model.layers.31.attention.wo.q_weight", |
|
"shape": [ |
|
4096, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 3678208 |
|
}, |
|
{ |
|
"name": "model.layers.31.attention.wo.q_scale", |
|
"shape": [ |
|
4096, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 12066816 |
|
}, |
|
{ |
|
"name": "model.layers.31.attention.wqkv.q_weight", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12582912, |
|
"byteOffset": 13115392 |
|
}, |
|
{ |
|
"name": "model.layers.31.attention.wqkv.q_scale", |
|
"shape": [ |
|
6144, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1572864, |
|
"byteOffset": 25698304 |
|
}, |
|
{ |
|
"name": "model.layers.31.attention_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 27271168 |
|
} |
|
], |
|
"md5sum": "9b55bfb7eeb43194a2fc42aed4f67081" |
|
}, |
|
{ |
|
"dataPath": "params_shard_104.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29360128, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.feed_forward.w2.q_weight", |
|
"shape": [ |
|
4096, |
|
1792 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 29360128, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "89dd1cbb0ff728095f2b119ad6af7077" |
|
}, |
|
{ |
|
"dataPath": "params_shard_105.bin", |
|
"format": "raw-shard", |
|
"nbytes": 189530112, |
|
"records": [ |
|
{ |
|
"name": "output.q_weight", |
|
"shape": [ |
|
92544, |
|
512 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 189530112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a92ba6f2c20edca6d8daa05d1abcaa40" |
|
}, |
|
{ |
|
"dataPath": "params_shard_106.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23691264, |
|
"records": [ |
|
{ |
|
"name": "output.q_scale", |
|
"shape": [ |
|
92544, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23691264, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bf51bfc037e5750d6f1411169fd991a2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_107.bin", |
|
"format": "raw-shard", |
|
"nbytes": 11026432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
28672, |
|
128 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7340032, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.31.feed_forward.w2.q_scale", |
|
"shape": [ |
|
4096, |
|
448 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 3670016, |
|
"byteOffset": 7340032 |
|
}, |
|
{ |
|
"name": "model.layers.31.ffn_norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 11010048 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
4096 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8192, |
|
"byteOffset": 11018240 |
|
} |
|
], |
|
"md5sum": "4380bf176a871bbc0f369c829fbced7c" |
|
} |
|
] |
|
} |