{ "metadata": { "ParamSize": 105, "ParamBytes": 7405330432.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 262144000, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 32000, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144000, "byteOffset": 0 } ], "md5sum": "cd25a0e6b4bc8161826fc5056e6cff2f" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.weight", "shape": [ 12288, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "4cd4f63250b348906985186e5d19be22" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "318388d943058ca57632f381c2bbd592" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 22016, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "a7070a67b57f475eeface96d35ad4e3b" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 4096, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "5e2c18827553eb1655236ebb1d51b510" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.weight", "shape": [ 12288, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3c646522247ddd81debd288247a2820d" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "885a1d2ab386ec87801d642babd86c07" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 22016, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "fd537fbddf068dee6c153e3c6ec3cbf2" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 4096, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "95a9d95f44e81bac0ea7b7c6116f5dd5" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.weight", "shape": [ 12288, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "53684986c85dd93648b9ec08b14c2871" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5df83f4f22ffb36896a9e7b50446c193" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 22016, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "95ce9e6a676a66f27b6a71908f1f4694" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 4096, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "a3398381a5c22f6fb7808dd243e8e145" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.weight", "shape": [ 12288, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d70ec0e94f9dcb485f22e3fe983c7354" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b24aadfe362b9d3a4042d7a7b4342e2a" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 22016, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "12668a6f09d295d0a91b0e0e48a39ac7" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 4096, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "e966aa20b847fb9aa4f2c0481846648f" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.weight", "shape": [ 12288, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "424aee9252a40b7b497cf0344ecc6a90" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "18fb823cac1011607404fad78a050e46" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 22016, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "0a721fe93e970c8f6f84797035764ec9" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 4096, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "cbf0a0817e02f8af89103aac4adab678" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.weight", "shape": [ 12288, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8ae44a030d303f649bdfc7d9219b8aa9" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "93e7e277431f7c6793d9d8a207c22a14" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 22016, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "c4a327895700afc715899fa627b8e26f" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 4096, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "3a37b974d511de656f2382ba62133f86" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.weight", "shape": [ 12288, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "41a73472bd0adf44146f00e1e910116b" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "566ce9aff08c9f355b70221f29d39c62" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 22016, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "e1ef3ef8e7d98aebbc66c4d174670505" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 4096, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "679a13b88ee766d85d7bc95b3890073c" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.weight", "shape": [ 12288, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "14aea661a9eaeb4b5e1672bf9e206304" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "31a5604a557bcbb9853758dca780b626" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 22016, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "5339293fe19e18daad8d479ffcb1a724" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 4096, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "4031ad352eec0120a63b31e0354e2da7" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.weight", "shape": [ 12288, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a1236d981350f8db06e743c074b98502" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6ac471aea5dd9fa92ce5f121f0f58617" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 22016, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "66729107c7d75122ca1f6fdf82e7cee4" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 4096, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "05983e5e58c5e0d32ce86cc7e3f49ba1" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.weight", "shape": [ 12288, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "147b5e8ad4fd080abf76fc9cdd9bd758" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d52eb5ce1f04a8ebd49912d11a3507f4" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 22016, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "fbd33ccc1834c93d9c5c3f32f2c4ce7c" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 4096, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "4882a5ed28597fc11ce1593c52a53ad6" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.weight", "shape": [ 12288, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8f7a23394846c884152e60f47d10f170" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3b334be246094198941316e21ad437a1" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 22016, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "43958cb1181eb394053172ac1eafe139" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 4096, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "9f6e1185ec0a3d4a0db0ad110fb55372" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.weight", "shape": [ 12288, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "fd27954abcaa68b4b6607d87a820c4d0" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "892df7b48193aeb6f51f0553a94884c9" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 22016, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "ca178abda39ca9c02fb16ec492541c4f" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 4096, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "ad74166a2eaa93ff6395a5e8a6354c8f" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.weight", "shape": [ 12288, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b7397a0857e8de6657a855119a20d71d" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "71f419719ae8c138464ca91907c9d97b" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 22016, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "ceb903e130b1008ff393efcb2f146128" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 4096, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "993fc79b4fdb810ec65ddf09d45e2716" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.weight", "shape": [ 12288, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "39253307df27775f7bcf926bada7497d" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ba542d70e312517be53f8183fe4dae81" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 22016, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "4cefbdbcd104420f62edcceb1b96057c" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 4096, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "6e932be356dd90bf22e2693aee7ac2d2" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.weight", "shape": [ 12288, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "1e0e808d0064d94b1c5da59860e8db24" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "466759ff2429658c910c37f5bc9d20f1" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 22016, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "88e97b46efae2a3da238d65fde1adb61" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 4096, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "cca00bc36ca644e07adcc2a8f6a24114" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.weight", "shape": [ 12288, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c9e3ded76bc30ad58eaeaa925d41aee9" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d49c550cd19e9f47e245fabe0f590ae7" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 22016, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "fc552dee542f6ebb7ed590a74cb18681" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 4096, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "bb873eb18ba5651993626f73d913e5e1" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.weight", "shape": [ 12288, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "67f33745c9844f82f59d64f60e9a78aa" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e16bf94b5970adde5a727c8e3c9d60de" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 22016, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "c1ffa40ea7bdf607bcb9322924cd7637" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 4096, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "350720761de687e2e5b266c15be1be12" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 262144000, "records": [ { "name": "lm_head.weight", "shape": [ 32000, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144000, "byteOffset": 0 } ], "md5sum": "c4453fd8afd0e9397475a1626c5b5940" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 286720, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8192 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16384 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24576 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32768 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 40960 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 49152 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 57344 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 65536 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 73728 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 81920 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 90112 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 98304 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 106496 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 114688 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 122880 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 131072 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 139264 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 147456 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 155648 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 163840 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 172032 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 180224 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 188416 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 196608 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 204800 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 212992 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 221184 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 229376 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 237568 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 245760 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 253952 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 262144 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 270336 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 278528 } ], "md5sum": "e774a2d70024de12f2a25b167b4e89b7" } ] }