illian01's picture
Add weights and config
537f54e
raw
history blame
55.7 kB
{
"metadata": {
"ParamSize": 105,
"ParamBytes": 7405330432.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 262144000,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
32000,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144000,
"byteOffset": 0
}
],
"md5sum": "cd25a0e6b4bc8161826fc5056e6cff2f"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.weight",
"shape": [
12288,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "4cd4f63250b348906985186e5d19be22"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "318388d943058ca57632f381c2bbd592"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
22016,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "a7070a67b57f475eeface96d35ad4e3b"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
4096,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "5e2c18827553eb1655236ebb1d51b510"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.weight",
"shape": [
12288,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "3c646522247ddd81debd288247a2820d"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "885a1d2ab386ec87801d642babd86c07"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
22016,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "fd537fbddf068dee6c153e3c6ec3cbf2"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
4096,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "95a9d95f44e81bac0ea7b7c6116f5dd5"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.weight",
"shape": [
12288,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "53684986c85dd93648b9ec08b14c2871"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5df83f4f22ffb36896a9e7b50446c193"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
22016,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "95ce9e6a676a66f27b6a71908f1f4694"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
4096,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "a3398381a5c22f6fb7808dd243e8e145"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.weight",
"shape": [
12288,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "d70ec0e94f9dcb485f22e3fe983c7354"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "b24aadfe362b9d3a4042d7a7b4342e2a"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
22016,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "12668a6f09d295d0a91b0e0e48a39ac7"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
4096,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "e966aa20b847fb9aa4f2c0481846648f"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.weight",
"shape": [
12288,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "424aee9252a40b7b497cf0344ecc6a90"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "18fb823cac1011607404fad78a050e46"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
22016,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "0a721fe93e970c8f6f84797035764ec9"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
4096,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "cbf0a0817e02f8af89103aac4adab678"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.weight",
"shape": [
12288,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "8ae44a030d303f649bdfc7d9219b8aa9"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "93e7e277431f7c6793d9d8a207c22a14"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
22016,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "c4a327895700afc715899fa627b8e26f"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
4096,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "3a37b974d511de656f2382ba62133f86"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.weight",
"shape": [
12288,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "41a73472bd0adf44146f00e1e910116b"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "566ce9aff08c9f355b70221f29d39c62"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
22016,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "e1ef3ef8e7d98aebbc66c4d174670505"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
4096,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "679a13b88ee766d85d7bc95b3890073c"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.weight",
"shape": [
12288,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "14aea661a9eaeb4b5e1672bf9e206304"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "31a5604a557bcbb9853758dca780b626"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
22016,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "5339293fe19e18daad8d479ffcb1a724"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
4096,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "4031ad352eec0120a63b31e0354e2da7"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.weight",
"shape": [
12288,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "a1236d981350f8db06e743c074b98502"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6ac471aea5dd9fa92ce5f121f0f58617"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
22016,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "66729107c7d75122ca1f6fdf82e7cee4"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
4096,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "05983e5e58c5e0d32ce86cc7e3f49ba1"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.weight",
"shape": [
12288,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "147b5e8ad4fd080abf76fc9cdd9bd758"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "d52eb5ce1f04a8ebd49912d11a3507f4"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
22016,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "fbd33ccc1834c93d9c5c3f32f2c4ce7c"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
4096,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "4882a5ed28597fc11ce1593c52a53ad6"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.weight",
"shape": [
12288,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "8f7a23394846c884152e60f47d10f170"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "3b334be246094198941316e21ad437a1"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
22016,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "43958cb1181eb394053172ac1eafe139"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
4096,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "9f6e1185ec0a3d4a0db0ad110fb55372"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.weight",
"shape": [
12288,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "fd27954abcaa68b4b6607d87a820c4d0"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "892df7b48193aeb6f51f0553a94884c9"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
22016,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "ca178abda39ca9c02fb16ec492541c4f"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
4096,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "ad74166a2eaa93ff6395a5e8a6354c8f"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.weight",
"shape": [
12288,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "b7397a0857e8de6657a855119a20d71d"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "71f419719ae8c138464ca91907c9d97b"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
22016,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "ceb903e130b1008ff393efcb2f146128"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
4096,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "993fc79b4fdb810ec65ddf09d45e2716"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.weight",
"shape": [
12288,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "39253307df27775f7bcf926bada7497d"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "ba542d70e312517be53f8183fe4dae81"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
22016,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "4cefbdbcd104420f62edcceb1b96057c"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
4096,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "6e932be356dd90bf22e2693aee7ac2d2"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.weight",
"shape": [
12288,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "1e0e808d0064d94b1c5da59860e8db24"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "466759ff2429658c910c37f5bc9d20f1"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.weight",
"shape": [
22016,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "88e97b46efae2a3da238d65fde1adb61"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.14.mlp.down_proj.weight",
"shape": [
4096,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "cca00bc36ca644e07adcc2a8f6a24114"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.weight",
"shape": [
12288,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "c9e3ded76bc30ad58eaeaa925d41aee9"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "d49c550cd19e9f47e245fabe0f590ae7"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.weight",
"shape": [
22016,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "fc552dee542f6ebb7ed590a74cb18681"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.15.mlp.down_proj.weight",
"shape": [
4096,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "bb873eb18ba5651993626f73d913e5e1"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.weight",
"shape": [
12288,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "67f33745c9844f82f59d64f60e9a78aa"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e16bf94b5970adde5a727c8e3c9d60de"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.weight",
"shape": [
22016,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "c1ffa40ea7bdf607bcb9322924cd7637"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.16.mlp.down_proj.weight",
"shape": [
4096,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "350720761de687e2e5b266c15be1be12"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 262144000,
"records": [
{
"name": "lm_head.weight",
"shape": [
32000,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144000,
"byteOffset": 0
}
],
"md5sum": "c4453fd8afd0e9397475a1626c5b5940"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 286720,
"records": [
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 0
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8192
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16384
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24576
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32768
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 40960
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 49152
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 57344
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 65536
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 73728
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 81920
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 90112
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 98304
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 106496
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 114688
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 122880
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 131072
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 139264
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 147456
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 155648
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 163840
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 172032
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 180224
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 188416
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 196608
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 204800
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 212992
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 221184
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 229376
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 237568
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 245760
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 253952
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 262144
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 270336
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 278528
}
],
"md5sum": "e774a2d70024de12f2a25b167b4e89b7"
}
]
}