|
{ |
|
"metadata": { |
|
"ParamSize": 195, |
|
"ParamBytes": 7642159104.0, |
|
"BitsPerParam": 16.0 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 197001216, |
|
"records": [ |
|
{ |
|
"name": "lm_head.weight", |
|
"shape": [ |
|
32064, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 197001216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "48a04baa06c8140d1ae4fd61ea2a51b1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.21.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2065d9f788567547a774f5ffb9f1131e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.21.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cb9728ad4fa6db615b1caad12129df66" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.21.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "93eb83c12f0c5e8a7c3108b394c91f84" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.22.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "608636bdc84e74e9c06edc92b184ad51" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.22.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "016c70a051cf51d54861397f6e389ba0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.22.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6fda1306b81ea8a098567dc6d73b273a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.23.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9fbb3dbb48f84112a1cfdd1a3dfc1309" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.23.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fe08a9496e9398f79dae924ceb0067eb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.23.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "50bd13a3e1b4f4f677030aa126d58003" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.23.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a4bda16fe3cd9cc9a45a679920864915" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.24.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a7c2b8f5bc96ef8751dace0f4943eadd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.24.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ebc48a19827d58234b2c8bf0681a127a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.24.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4b73dc6122a84c6529a5f0ba77b6f9df" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.24.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8067191930fe75a1d628d5cd61a0bd37" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.25.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4600584c01f8b59b15bbb39157f83ea9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.25.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dee6947e469b73541a20200c4dfafc60" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.25.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "48e123914a2e6b87cf0f84b551c39274" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.25.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c97246a9dc9042ec19934b2cf8b30594" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.26.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cef6f7e1c2aef1f38f38ba9f0cef500d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.26.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "491dd3fab5b55ea5b79732be5ac73ad9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.26.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0cf916f8ba3a5b21acb9979546b353c8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.26.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a09a3bb9780b39448747ff97bfb01f77" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.27.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bc70e157266b370a65b2260fb956ef05" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.27.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e3049c1414aa0312b11252bc0df8e012" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.27.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a3cca75a3b6fab52c9e6d2925ed8c740" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.27.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a8a572beb8324e103d72a1c492b447c8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.28.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "838f4f01a65960266efa7b3dcfcd6f62" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.28.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "98d5793d5b479999176cc86adf256624" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.28.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "54d4c13f9f91500f02e4e550a952fa5a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.28.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "07cb6d2348769db041143c3e0774ecab" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.29.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b292bd2096b63ce0b3a69d8017346d07" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.29.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "710dfac38663642c90b81a90955e53f0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.29.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "184397f1a9cfa93527bd0b47bfc0f4d9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.29.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c0b0cdb47135abd8be76c8cdf519687b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.30.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "26b1c36720773b1715560a3380cfe5e8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.30.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "65ab4c77f8d88808327b115184c44056" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.30.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0a969f022b81189c9675ca624838bb7b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.30.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1f1118702f5f96cb08d2eadef0cd73a5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.31.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f6391b80aa186aca2df8d816b3d5d20b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.31.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cc354c852575d11406c100e4ecb6a360" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.31.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2f75fd301b667261b7b5cd83e306c61f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.31.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e0774dd3e2453472fe59ba3797255c73" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 197001216, |
|
"records": [ |
|
{ |
|
"name": "transformer.embd.weight", |
|
"shape": [ |
|
32064, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 197001216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2794efed4f90da2679835636c49453c1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.0.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aca7213bce912376770af1f9a7805546" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.0.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7d45b5a1d117d0fb2f97c2566b85c080" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.0.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b29c48242ab28d7751664fd917cb53d9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.0.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "43ffb7ce1c9bb16f42f8cd92533105ac" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.1.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fba6c01b38085811a0b169a689e9a8b0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.1.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "754739f276a532078fb8adf8a358a84f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.1.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b08e5972a3ee4dc0a5d2446c74b78667" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.1.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6a326d18c9a02a0ff844de3209d9d239" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.10.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5559680d21364578cee8b4f3b884c387" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.10.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b5a2560d04db862c4b0b44d480b3de11" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.10.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d7b4d5eb54d90c8b88cfcdcb7ab7a6d3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.10.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a2f33fa311c565fc0ad4b0b2ac9c24cf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.11.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2ea00513628cede454214266723b73fe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.11.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6f031b6a01f57cb949b87939fe3a6fcf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.11.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7629386137cde4883fe6092961628908" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.11.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "74f94e083aa4730d2e928a652bb93aff" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.12.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fe1dcf39db8e8c03468b5714d7574f1d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.12.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c55494e7965fcddc67886f2719fa4f28" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.12.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "01823150ed94ebda1ac03ff514674d59" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.12.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f7606df9f1f8a7ae529f3c3d84812cfe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.13.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5efd0889d320559ec7d2587d88560310" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.13.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fde3986982d33d33d6f9033954c4f76b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.13.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2d6c2ac2c20271403136de2b690e2c10" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.13.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f3e6c1bc7e706a4ca274baf2bb296a3e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.14.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a6206e415bbb24b99496af3833bde153" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.14.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ce3c652a54a9d59b42dfd107c214fedb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.14.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "08432e0cda4f3d4efb5af8226d292545" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.14.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4da8e23a88baea7d245115482c783b88" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.15.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "61fe1b44bf673ca96c6f5febd6dacaaa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.15.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d03015371c3aab576d7c554299a64a19" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.15.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ba6b9594289c02e4bd8405905e3b9345" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.15.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "30b9eab0df51695103e0d2c8281853d2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.16.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a87a68606ce19491b283ebfcb4f261e1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.16.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3bb222858ea27693a823cc6e86b89394" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.16.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aafaa8e5e6341a8b186315f30b33eeaf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.16.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c0ec806ad19c5f93174e13e77343e9d1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.17.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fa41b02a47344ce12ce2462ba41cddae" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.17.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8a5537e9896e145836ebf99dbb1d617b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.17.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "56aff7c581b6f3aa14835e0cf733a113" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.17.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bcb98fa15aca763650cfb90aad95ff8a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.18.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3d3b925b996f918f06902cb77831e82b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.18.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "006e594d0ec96f0a2b3824eeabc4405b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.18.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "73d5f7100db12528051f7f19aede3d60" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.18.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9f9e0dad8db22348008c009241057bba" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.19.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "64cba4f187a6c45a05dc06853132d7c4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.19.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a10d189b10ace93718ab187f3f78b402" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.19.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6a8801a5037374d1317431d8ed723218" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.19.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "329a57ff77416e0507f6fe9e662681e7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "40578ab77b8ea1571d74b9a25d46d821" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e88194e0aed044702686ba4edd832d5d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "baca8426aae056bc7a3ed32f5f7fa3b1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "43077c2fdc9511c9bc7750d89ad53321" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.20.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "16c500aee75a3f6fadfb79388040ce59" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.20.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "35e566f071950f559d9a68ed651cdc0e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.20.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bc4c11294dcef27da24c0d0c41cddb81" |
|
}, |
|
{ |
|
"dataPath": "params_shard_99.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.20.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6c09843d6224c0c491c11052703711d4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_100.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.21.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5301ec982f0f0bee7cbfd66c5f10db1e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_101.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.3.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ad69d774f1ef8d178b241ed77788034e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_102.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.3.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "71b49e1b1e72c73d95a73a80ff85cf56" |
|
}, |
|
{ |
|
"dataPath": "params_shard_103.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.3.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0a14557ee27469ea0c91accf21f1882a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_104.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.3.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1f612196539cf259ff0ba018a1d3fdd8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_105.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.4.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "125b30cce73232acdd57017b751b8ec0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_106.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.4.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "174b431f56124195a88de449325221d5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_107.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.4.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f47846816d3009156669ef65dd815b9c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_108.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.4.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fc5eab9b081abe8d88f8713d7340a33e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_109.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.5.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "57feb42a7dad4058566e19deebdacfdf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_110.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.5.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "122ada6175d0fb97ac9bd9c1c28f4a83" |
|
}, |
|
{ |
|
"dataPath": "params_shard_111.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.5.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4c27a252315d601d17df5b45c4ed394b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_112.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.5.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "73184c595beddb967a0a1fb4adb8a697" |
|
}, |
|
{ |
|
"dataPath": "params_shard_113.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.6.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e4e3eed244b4ffe0542a0e6e950d5c23" |
|
}, |
|
{ |
|
"dataPath": "params_shard_114.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.6.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4f529e42dabff29fb4432f509073c9e0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_115.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.6.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "48c4aca8ffbb252d17c5f5effb09a532" |
|
}, |
|
{ |
|
"dataPath": "params_shard_116.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.6.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8a0f2cc6cd07a2208027ced9f52c3ad0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_117.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.7.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9657e1f37ef7a3a950ff28e1b1c8d288" |
|
}, |
|
{ |
|
"dataPath": "params_shard_118.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.7.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6c071534349e4c2b500aa75b5f207769" |
|
}, |
|
{ |
|
"dataPath": "params_shard_119.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.7.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8e49ccfc80aa0ffe5526847f8c955f83" |
|
}, |
|
{ |
|
"dataPath": "params_shard_120.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.7.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "db54d2b3c23b16af51bc756ecf127307" |
|
}, |
|
{ |
|
"dataPath": "params_shard_121.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.8.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f9ab4d31e88e01d5d03b3cc9de36a71c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_122.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.8.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "13f0af026f446e192ec681cae51413d4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_123.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.8.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "084906da91b4132050bf217661c40b19" |
|
}, |
|
{ |
|
"dataPath": "params_shard_124.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.8.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e5c8f3b8d7bc024599ea7c12e86e12b6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_125.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.9.mlp.down_proj.weight", |
|
"shape": [ |
|
3072, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "eaad0fe95f377541e343e2b65c526166" |
|
}, |
|
{ |
|
"dataPath": "params_shard_126.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.9.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f32f91ad037860d6cd65911faac9277c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_127.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.9.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "442e739c1bd8c0ad4a7528a629d4f556" |
|
}, |
|
{ |
|
"dataPath": "params_shard_128.bin", |
|
"format": "raw-shard", |
|
"nbytes": 56623104, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.9.mixer.qkv_proj.weight", |
|
"shape": [ |
|
9216, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 56623104, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cba65d66e4a5aa25c15a78e798ac7d5f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_129.bin", |
|
"format": "raw-shard", |
|
"nbytes": 19273728, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.21.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 6144 |
|
}, |
|
{ |
|
"name": "transformer.h.22.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 12288 |
|
}, |
|
{ |
|
"name": "transformer.h.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18432 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mixer.out_proj.weight", |
|
"shape": [ |
|
3072, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 24576 |
|
}, |
|
{ |
|
"name": "transformer.h.23.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "transformer.h.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18905088 |
|
}, |
|
{ |
|
"name": "transformer.h.24.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18911232 |
|
}, |
|
{ |
|
"name": "transformer.h.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18917376 |
|
}, |
|
{ |
|
"name": "transformer.h.25.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18923520 |
|
}, |
|
{ |
|
"name": "transformer.h.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18929664 |
|
}, |
|
{ |
|
"name": "transformer.h.26.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18935808 |
|
}, |
|
{ |
|
"name": "transformer.h.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18941952 |
|
}, |
|
{ |
|
"name": "transformer.h.27.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18948096 |
|
}, |
|
{ |
|
"name": "transformer.h.27.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18954240 |
|
}, |
|
{ |
|
"name": "transformer.h.28.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18960384 |
|
}, |
|
{ |
|
"name": "transformer.h.28.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18966528 |
|
}, |
|
{ |
|
"name": "transformer.h.29.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18972672 |
|
}, |
|
{ |
|
"name": "transformer.h.29.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18978816 |
|
}, |
|
{ |
|
"name": "transformer.h.30.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18984960 |
|
}, |
|
{ |
|
"name": "transformer.h.30.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18991104 |
|
}, |
|
{ |
|
"name": "transformer.h.31.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 18997248 |
|
}, |
|
{ |
|
"name": "transformer.h.31.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19003392 |
|
}, |
|
{ |
|
"name": "transformer.norm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19009536 |
|
}, |
|
{ |
|
"name": "transformer.h.0.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19015680 |
|
}, |
|
{ |
|
"name": "transformer.h.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19021824 |
|
}, |
|
{ |
|
"name": "transformer.h.1.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19027968 |
|
}, |
|
{ |
|
"name": "transformer.h.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19034112 |
|
}, |
|
{ |
|
"name": "transformer.h.10.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19040256 |
|
}, |
|
{ |
|
"name": "transformer.h.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19046400 |
|
}, |
|
{ |
|
"name": "transformer.h.11.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19052544 |
|
}, |
|
{ |
|
"name": "transformer.h.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19058688 |
|
}, |
|
{ |
|
"name": "transformer.h.12.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19064832 |
|
}, |
|
{ |
|
"name": "transformer.h.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19070976 |
|
}, |
|
{ |
|
"name": "transformer.h.13.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19077120 |
|
}, |
|
{ |
|
"name": "transformer.h.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19083264 |
|
}, |
|
{ |
|
"name": "transformer.h.14.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19089408 |
|
}, |
|
{ |
|
"name": "transformer.h.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19095552 |
|
}, |
|
{ |
|
"name": "transformer.h.15.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19101696 |
|
}, |
|
{ |
|
"name": "transformer.h.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19107840 |
|
}, |
|
{ |
|
"name": "transformer.h.16.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19113984 |
|
}, |
|
{ |
|
"name": "transformer.h.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19120128 |
|
}, |
|
{ |
|
"name": "transformer.h.17.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19126272 |
|
}, |
|
{ |
|
"name": "transformer.h.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19132416 |
|
}, |
|
{ |
|
"name": "transformer.h.18.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19138560 |
|
}, |
|
{ |
|
"name": "transformer.h.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19144704 |
|
}, |
|
{ |
|
"name": "transformer.h.19.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19150848 |
|
}, |
|
{ |
|
"name": "transformer.h.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19156992 |
|
}, |
|
{ |
|
"name": "transformer.h.2.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19163136 |
|
}, |
|
{ |
|
"name": "transformer.h.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19169280 |
|
}, |
|
{ |
|
"name": "transformer.h.20.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19175424 |
|
}, |
|
{ |
|
"name": "transformer.h.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19181568 |
|
}, |
|
{ |
|
"name": "transformer.h.3.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19187712 |
|
}, |
|
{ |
|
"name": "transformer.h.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19193856 |
|
}, |
|
{ |
|
"name": "transformer.h.4.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19200000 |
|
}, |
|
{ |
|
"name": "transformer.h.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19206144 |
|
}, |
|
{ |
|
"name": "transformer.h.5.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19212288 |
|
}, |
|
{ |
|
"name": "transformer.h.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19218432 |
|
}, |
|
{ |
|
"name": "transformer.h.6.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19224576 |
|
}, |
|
{ |
|
"name": "transformer.h.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19230720 |
|
}, |
|
{ |
|
"name": "transformer.h.7.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19236864 |
|
}, |
|
{ |
|
"name": "transformer.h.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19243008 |
|
}, |
|
{ |
|
"name": "transformer.h.8.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19249152 |
|
}, |
|
{ |
|
"name": "transformer.h.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19255296 |
|
}, |
|
{ |
|
"name": "transformer.h.9.ln.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19261440 |
|
}, |
|
{ |
|
"name": "transformer.h.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19267584 |
|
} |
|
], |
|
"md5sum": "f066d7c15dbf7a42f65846ff50a0ae06" |
|
} |
|
] |
|
} |