|
{ |
|
"metadata": { |
|
"ParamSize": 313, |
|
"ParamBytes": 4284263424.0, |
|
"BitsPerParam": 4.500503319461262 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 272498688, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_weight", |
|
"shape": [ |
|
152064, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 272498688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dd1ad9a64be1f087b8b8fb7aaa92c471" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 34062336, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_scale", |
|
"shape": [ |
|
152064, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 34062336, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "78240fe50127cb57a39db6e78e085fea" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 272498688, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.q_weight", |
|
"shape": [ |
|
152064, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 272498688, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b102e89d2f4def237af7a5db57118903" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 34062336, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.q_scale", |
|
"shape": [ |
|
152064, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 34062336, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "91acaa1cf63ef1c851f7d5a110324340" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ee710d8aeec8c914eb729b282d8557de" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "471d4dc43bd5027d3d7d16b9803975f9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ebfbf499a0a90e1d6008c3d7e3602d3e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "45bd8e0519624d1566a1b0c00c598d77" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33519616, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 7168 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 4250624 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 12737536 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 12744704 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 12753920 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 21011456 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 22043648 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 28466176 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29268992 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 29276160 |
|
} |
|
], |
|
"md5sum": "5ad2db354f331aaaa4b0484c7b230fce" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5c28c927aeed75fa5f259da4447e5c95" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "77f537fcc08c24211b0fdbfffc7618dd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29268992, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 8486912 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 8494080 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 8503296 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 16760832 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 17793024 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 24215552 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 25018368 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 25025536 |
|
} |
|
], |
|
"md5sum": "538f9333deb17e3f2409290896810460" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d6e0864b7a1b05984aa1c6b7b1bb830b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b3a9c9e1b1ddb10f7f94d1036e41517f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29268992, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 8486912 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 8494080 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 8503296 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 16760832 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 17793024 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 24215552 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 25018368 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 25025536 |
|
} |
|
], |
|
"md5sum": "6c288e8de181fdb49ea5373e567aef1c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b86ead93765dfddb5bc5053c4698f084" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "89dd0b2229b9081b080e83550db3c74d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29268992, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 8486912 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 8494080 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 8503296 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 16760832 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 17793024 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 24215552 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 25018368 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 25025536 |
|
} |
|
], |
|
"md5sum": "22c964423318052d691c5e3ed5d1d738" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e8acc8a5ac995bb21e61e0fc6dc3ffcb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ca6f2adb44bbe6842d229945c7dc7b04" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29268992, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 8486912 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 8494080 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 8503296 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 16760832 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 17793024 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 24215552 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 25018368 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 25025536 |
|
} |
|
], |
|
"md5sum": "0a58de7f2bd16733d779f04bc61c5613" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "131a0382e7df3879da8a42b3395502de" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9d14f1c0a61a7caf61b348c44c38ffce" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29268992, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 8486912 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 8494080 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 8503296 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 16760832 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 17793024 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 24215552 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 25018368 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 25025536 |
|
} |
|
], |
|
"md5sum": "6571030a5f5064e1ea9182d4e3affb51" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "618907db5e7b791dcaf73d0fade21941" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b853ff62c3ed480c532a065bc6770297" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29268992, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 8486912 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 8494080 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 8503296 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 16760832 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 17793024 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 24215552 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 25018368 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 25025536 |
|
} |
|
], |
|
"md5sum": "a12e349db044d08a649fbcb0d4ddc72e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33285120, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 8486912 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 8494080 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 8503296 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 16760832 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 17793024 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 24215552 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 25018368 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 25027584 |
|
} |
|
], |
|
"md5sum": "0bd40546de716e3e3ce23a3fbf444cce" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "66f31f528a25d33a7776c8945daeca1e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1753590d20467666716e8e5465510808" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30301184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 1032192 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 7454720 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 8257536 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 8264704 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 12508160 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 20995072 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 21002240 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 21011456 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 29268992 |
|
} |
|
], |
|
"md5sum": "589e703deceebeef3b26f6000a3b222d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b91dcffeab6bce4e6b6c645b31b7834f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "eb0a4c840cbcdce1ccb82d75eedd721a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29268992, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 6422528 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 7225344 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 7232512 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 11475968 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 19962880 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 19970048 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 19979264 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 28236800 |
|
} |
|
], |
|
"md5sum": "0d3d7bddb49134917a5a8f7effbb70d2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f3acf2960a22b828d451400f894298e9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "96788fd0a634e358224a18fb4591e056" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29268992, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 6422528 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 7225344 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 7232512 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 11475968 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 19962880 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 19970048 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 19979264 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 28236800 |
|
} |
|
], |
|
"md5sum": "1beaca350959c86f8e818ca5057f27fc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "43600e11ab9970f13e13ca7449b5955b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c4fab44e82748a7a5b1a6aa6f4169eca" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29268992, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 6422528 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 7225344 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 7232512 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 11475968 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 19962880 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 19970048 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 19979264 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 28236800 |
|
} |
|
], |
|
"md5sum": "454256de88d94e2cc984c943ddd861ba" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c7e93607fcfd1aebfa54b9d217c7396a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "14f90f6de1e838117d71412e706ccf40" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29268992, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 6422528 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 7225344 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 7232512 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 11475968 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 19962880 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 19970048 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 19979264 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 28236800 |
|
} |
|
], |
|
"md5sum": "d55e5c1c0ebe743a5f45221284608f42" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1d46784790fb63e46414edeb80520d3f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b8d31f0d78499aaa48f40d67db5e646e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29268992, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 6422528 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 7225344 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 7232512 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 11475968 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 19962880 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 19970048 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 19979264 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 28236800 |
|
} |
|
], |
|
"md5sum": "9c30973a2670fdf0d4edd1614809c5c3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ae8929028b1ab58f9888b816eb19b2bc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "68db58d99cb693eac70b87c8bfaf97fb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29268992, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 6422528 |
|
}, |
|
{ |
|
"name": "model.layers.16.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 7225344 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 7232512 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 11475968 |
|
}, |
|
{ |
|
"name": "model.layers.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 19962880 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 19970048 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 19979264 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 28236800 |
|
} |
|
], |
|
"md5sum": "28288d498cba750e2c833762a6082e28" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f17689c71c1f9e52333b35e11e6620fc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a4edb0a7a6d136c01ceb5399413ef4c3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29268992, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 6422528 |
|
}, |
|
{ |
|
"name": "model.layers.17.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 7225344 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 7232512 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 11475968 |
|
}, |
|
{ |
|
"name": "model.layers.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 19962880 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 19970048 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 19979264 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 28236800 |
|
} |
|
], |
|
"md5sum": "6404d8ea49f8e947954f411432baeea2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "49a0a15c14acd524da4599af46dfd7a5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cad0b886ffc20a263ab1e8c17e29574b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32243712, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 6422528 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 7225344 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 15712256 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 15721472 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 23979008 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 25011200 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 31433728 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 32236544 |
|
} |
|
], |
|
"md5sum": "8cd629c68a1a8bb4164fd58b6c989b43" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "84acd36cfbb3da01521ea1de8672c769" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "040e3d369e95794019f192e0df993d5e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1e959ef609517921a165bdb95b0f49db" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25491456, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 4243456 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 12730368 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 12737536 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 12744704 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 16988160 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 25475072 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 25482240 |
|
} |
|
], |
|
"md5sum": "4fb3865deffe5980fa6d4df2c8b959b4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "675b7f59fc981220b4ed037eeb916524" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c45d9c62f884058da49319a20c090b83" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "29af8ca58690e654041e551fb028e1bf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33526784, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 8257536 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 9289728 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 15712256 |
|
}, |
|
{ |
|
"name": "model.layers.18.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 16515072 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 16522240 |
|
}, |
|
{ |
|
"name": "model.layers.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 20765696 |
|
}, |
|
{ |
|
"name": "model.layers.19.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 20772864 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 20780032 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 25023488 |
|
}, |
|
{ |
|
"name": "model.layers.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33510400 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33517568 |
|
} |
|
], |
|
"md5sum": "a664eeb83fc99dae2ccde376ac6561c4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c338e1078d3ee9d72dc17c48ee7cec23" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "588119f0db1c989d471e7680ca74f36c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29268992, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 8257536 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 9289728 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 15712256 |
|
}, |
|
{ |
|
"name": "model.layers.20.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 16515072 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 16522240 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 20765696 |
|
}, |
|
{ |
|
"name": "model.layers.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29252608 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 29259776 |
|
} |
|
], |
|
"md5sum": "2f740748b62c3da5e0bee1cb53087d4b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0a0ed33ac7c33e133fd885fd2a58affa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6c128d022845e14d3a2dea583f7a7c0b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29268992, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 8257536 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 9289728 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 15712256 |
|
}, |
|
{ |
|
"name": "model.layers.21.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 16515072 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 16522240 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 20765696 |
|
}, |
|
{ |
|
"name": "model.layers.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29252608 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 29259776 |
|
} |
|
], |
|
"md5sum": "582be67780507d3fcfe5b8a650947b5c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a72daa2b9c3b7fe76049ffaa1dd4a182" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "be9602f8dd5e99938685211f26b5772b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29268992, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 8257536 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 9289728 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 15712256 |
|
}, |
|
{ |
|
"name": "model.layers.22.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 16515072 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 16522240 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 20765696 |
|
}, |
|
{ |
|
"name": "model.layers.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29252608 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 29259776 |
|
} |
|
], |
|
"md5sum": "fb296b236082653f2f8e507753f6c6de" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a6bdec306334b6b96402b2c09c21bcef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fef6959e75288d21aa398bb543c70a90" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29268992, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 8257536 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 9289728 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 15712256 |
|
}, |
|
{ |
|
"name": "model.layers.23.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 16515072 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 16522240 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 20765696 |
|
}, |
|
{ |
|
"name": "model.layers.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29252608 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 29259776 |
|
} |
|
], |
|
"md5sum": "c9429acd01edaca67ee4ddf5a1f0bfec" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ecd9d1ac0140dab1f74673dcc99bb8fc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e15630a3b420415b5ff57384de08224d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29268992, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 8257536 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 9289728 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 15712256 |
|
}, |
|
{ |
|
"name": "model.layers.24.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 16515072 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 16522240 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 20765696 |
|
}, |
|
{ |
|
"name": "model.layers.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29252608 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 29259776 |
|
} |
|
], |
|
"md5sum": "baa49f5a0c23ecaffa8a12ee2c173504" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c8a097ab44cc9864b04d9cdefb7904d9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6687f0efd461c2b1735646dd7c983421" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29268992, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 8257536 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 9289728 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 15712256 |
|
}, |
|
{ |
|
"name": "model.layers.25.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 16515072 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 16522240 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 20765696 |
|
}, |
|
{ |
|
"name": "model.layers.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29252608 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 29259776 |
|
} |
|
], |
|
"md5sum": "7e3a5f09e7588a498f0615fbbe2784e7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6fad4af335a4f28de3584716a4a658cc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b6bc51b89e945f747b53d2e1acc91c4e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29268992, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 8257536 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 9289728 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 15712256 |
|
}, |
|
{ |
|
"name": "model.layers.26.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 16515072 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 16522240 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 20765696 |
|
}, |
|
{ |
|
"name": "model.layers.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29252608 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 29259776 |
|
} |
|
], |
|
"md5sum": "4d01c807c01932f78ef814638dcf21af" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33947648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
2368 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33947648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "22ec0f9e7b012fbd472fba2d369cf038" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67895296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
37888, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67895296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "85db2a124c9ba20e9f2f32632f5b37e2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29268992, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 8257536 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 9289728 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 15712256 |
|
}, |
|
{ |
|
"name": "model.layers.27.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 16515072 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
592 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4243456, |
|
"byteOffset": 16522240 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
37888, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8486912, |
|
"byteOffset": 20765696 |
|
}, |
|
{ |
|
"name": "model.layers.27.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 29252608 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 29259776 |
|
} |
|
], |
|
"md5sum": "49d0780d91dea760f24abac81cbd667c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16522240, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.c_attn.q_weight", |
|
"shape": [ |
|
4608, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8257536, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.c_attn.q_scale", |
|
"shape": [ |
|
4608, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1032192, |
|
"byteOffset": 8257536 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
3584, |
|
448 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6422528, |
|
"byteOffset": 9289728 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
3584, |
|
112 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 802816, |
|
"byteOffset": 15712256 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 16515072 |
|
} |
|
], |
|
"md5sum": "09ab8528aeb57d430892ca1941f9c86d" |
|
} |
|
] |
|
} |