{ "metadata": { "ParamSize": 291, "ParamBytes": 39722299392.0, "BitsPerParam": 14.56926321506206 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.0.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "7db413de247aca08fe9236a2e9090e48" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.0.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "59c77c1eb45dcbec5f24042796d014fd" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.0.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "66271a8bfdc881d530bddce423a16473" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.0.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "5e6ad79422b84ab6817e84ce1eae9142" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 1137180672, "records": [ { "name": "model.tok_embeddings.weight", "shape": [ 92544, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1137180672, "byteOffset": 0 } ], "md5sum": "c2c0dcdaa5fb65dc27c3af77f4be9222" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.1.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "6a67d00fca1fb44ec6c145f25edcf09d" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.1.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "0f3bce60bc68c6b0486ab25e032f1bef" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.1.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "a56910c6a122b400b1b78632935484e4" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.1.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "a78aaa58397280571207a00af537581c" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.2.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "cbbc42abfea33905e066e923b4fa5615" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.2.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3fb14b1507ee68a3cb2da640bf5f1621" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.2.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "0c3060a8a29e05e7fee9983220c66142" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.2.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "7565e5436c7cebbe2d0da90a2d314ee7" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.3.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0df3bec71638aad48c3f6fdf43250582" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.3.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "cded647762891145b4f2708cfb0219ce" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.3.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "6c0da8fb8b983d34d16a700a4b026219" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.10.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "3aa943f7c89f892a6ff2cc6c5dcf97bd" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.10.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ef650d89e9f5561640e879d433eb9291" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.10.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "a59efb340313927ff568bb83d6189269" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.10.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "140bc29d20ec55db204d76c7757910d8" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.8.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "a45245fb0d0dcfadfca2ebb59d27b11f" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.8.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "6089e29b51b9445b391472dba762a4ae" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.9.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "67f22370f2e77097c0dd66e71dbed781" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.9.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d0f49513aa0a3a998616d7fac6c31783" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.9.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "7f875669b4b20e1a195b3d6fe124b54e" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.9.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "b2abe5a96278a3aa700575d7c3974913" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.11.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "115da155e91aedc832328763ea5fe0db" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.11.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "0f7ebef9ec340a3a76111f828a655fbf" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.11.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "2f98db679b023feef88e6e6161ca559e" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.11.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "91c1e7ace4cc6d5a811ffbe9e2091b82" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.12.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0c127b17b7f497e819bfad9366395efd" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.12.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3c8bbddb85a8a60221b60a36392dae0e" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.12.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "65e865d0402bd45bf47bdc53124ca7ab" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.12.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "f72453e64e6b45746f737ba3e873ad8f" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.13.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "bf8df85329a7581d062da7a3c49cea76" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.13.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "f4607212cc716ca83ed380c0b7d8226d" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.13.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "33e947cea8860a6d7ca378a3c7e8f47f" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.13.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "2d2fbaa70fe5504c6893bb00bd341b15" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.14.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "58a7fae456e781a6349d68341fd472e7" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.14.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "aad5026e5d5235e5095116244220c107" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.14.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "f2bd511d42a447d508b23d9fc00cb071" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.14.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "79364dac6fa081b3497ed2a0f9899321" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.15.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "376f15535d3f62aa3f0b6891546d6118" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.15.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "2c414d31013d54a7372866bff2a3e905" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.15.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "7c41335c011514c3c6e3b6fc998f1dc7" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.15.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "8a450e3c401107275f5d32c1234b6aa8" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.16.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "deadbfc6d8875d5e6cdad4cefa3685fb" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.16.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "cbb9e3961a34277734f849e45866714a" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.16.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "f5a4ce98c5d2e52493925a19c7654adf" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.16.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "442cb65ef56d6751250fec205f32206d" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.17.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "996c44fc4a143c6d313a6c1ab33751db" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.17.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "03f8715bb6374a621fcc498bddafdca1" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.17.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "d00a5c827c10b9efd8b963c56db13cea" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.17.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "1117c3033d8d3ac0b0d5a30ab12fefb6" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.18.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "39fb853910df32a025a6eb9ca4a25473" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.18.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "34904a27391a85623384e5da29c38e60" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.18.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "2a3eaef32fc92ccb6871327c8b875f5c" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.18.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "63e9e39b6cff6bc320e6cea70ba69639" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.19.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "a0b4ba9b944d8dbf07da9c0d087c1ba4" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.19.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "103771179c443ce4f4e28e7e06aebb20" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.19.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "7e687c9999b496d0009fd5635a48f33d" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.19.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "fc458af151395e439cac311eba2227bc" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.20.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "4b75a822656f9d5ef019b4aebbb86568" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.20.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "72d2fe17844cf036d734bd591202e73f" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.20.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "0b3cebdf40ae164954edabaed15831b0" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.20.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "7fc53cdd069a070f875ea52d2f34a00d" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.21.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "22e5b73efc709a50a55bee29ac9057df" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.21.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "4ce4eba3c2fe8fbebc02a092edecdd7f" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.21.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "de936b13fa07faeca154116205e9d2eb" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.21.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "7804e21972af4a0a98c4942116e28b6a" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.22.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "5fce8a96d1bf435bd8dc33611ddbf449" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.22.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "0fc9ce7067c18cf479452ef08dcecb3b" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.22.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "4271a3890431078cc5ec4be13cb273f1" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.22.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "d03f2f509167fa77b85e331257422ba9" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.23.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "f026f30b4b7f1fcccaf64e6aaf938462" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.23.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "66d19ca9fee7430684548cce174d4e13" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.23.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "69a9e86d60c5ba00ff45ccf02794b67b" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.23.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "167de8311549bc8da52b1682e5bd7b44" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.24.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "779d094bb1c6cce6debfa64b638f5e61" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.24.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3ea567219c3af38665ed09b6d1b16e2d" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.24.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "2296ab0d0df0cc8d1a4bd3a8a5e06f4c" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.24.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "aa990ed8557bc5a8e003486d56f4e187" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.25.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "58b0ca9f82716be7e75b4fb7455b1e0f" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.25.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "0c0aebabe5bacb9624c63a0c6a2a4982" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.25.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "ae149d0cbec934811f4fd3a4d28ae2fd" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.25.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "35390a0c485b710883f77d26dc145867" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.26.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "abe647676c59ff8962a8c57567ddd83a" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.26.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "234c054cc614010c6798441f9f33617b" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.26.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "dbbab3c909bc282209abe32d6f29820b" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.26.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "43bca02513e73101ecc185d9e9efa6b5" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.27.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "95e85002434e9c1d8695f865ce51ebdb" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.27.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ed986c68569f42b1c0d0dc3f4ed884c5" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.27.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "64bf6132b570a610b7247f680862f2b0" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.27.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "97257a4bf41f98824cd5c30a4fa53073" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.28.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "394ed581bc5d2e2998d06a4c3a028b33" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.28.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "96899c819298dbdeb12c7789383ed44d" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.28.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "5bf4c2b3bf9c19b599d4d9c3bba6d73c" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.28.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "c1b9e2eda5bf8720513b8c8f65bd943a" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.29.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "c0a1708ab31208cdd0c6d5363d6d8093" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.29.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "838bd986ce2b844f216547d20c13b95e" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.29.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "755c0156a106a48c4f09b4d20db1b948" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.29.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "f425cc44a1efd94696cc7e9de4ca49c4" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.30.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "9386cb4e7eb3d5ee5a979bb7ac401c14" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.30.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "86a6705a6e9371f902e0be4c636bc4a5" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.30.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "ddf2fad7986595d974215da056b5fbf8" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.30.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "df3f83c9f0b140cade318c7935ecfae6" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.3.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "1afed988133967734c5b986e67fa77f9" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.4.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "208154e4e96d7b69943d25dbaea017e0" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.4.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e93ef785e28cd634b7c9fa27a2302e89" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.4.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "dbb3f9f1c03e11b8979f5e5c71b97161" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.4.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "b2047ed1a305ce22e9033c3087ea6dd1" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.5.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "9e4f4bbbd333758f10304c222b059adf" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.5.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "621902bde2f85d5f7b769d9edec80627" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.5.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "bd0ff996707fdbd4014b85e59a9902a7" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.5.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "4e26cb8217bba4d5146f3e379bc16a5a" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.31.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "a7c7b64f3dfa86ce363887dbb9cd513e" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.31.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ad296d6a68401a33255105f1357b42b5" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.31.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "c5f0790a4f97440bd213fb188a3af04b" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.31.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "bb56c06177c45c5bd2918981fa0cc481" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.32.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "08b57ff23c3674814561cede73380547" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.32.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e58867d7ac6d2ec5c9b4ebea874cef29" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.32.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "df1b606f74f2f5be355a3520e19772dd" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.32.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "f1efc78efe9c02501a66cfb689cf6a3c" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.33.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "5da48a915e2929f04ec1396efecb0c11" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.33.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d4d9c37a234cd8036d9ea0017d411ffe" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.33.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "af43da9fdb44e7935315c9cb21836bd0" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.33.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "c4d0cb7fb30a658097c55565769664a4" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.34.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "f469789ef1e085ab677689b7a1676fbe" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.34.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "9a0a4f84967d2300b4e53f2e756729e0" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.34.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "10365a52603934a5d50603ff4330703a" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.34.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "b8029e5306d1592191b2723a18600d45" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.35.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "7556d983866e211fa47eb789514fb419" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.35.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "55d01740e75910478c7f74ec12a03d91" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.35.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "917a37e221e779cb91538f8688e2652e" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.35.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "bdd848bc9a8851a6bdd5267d82e93d1d" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.36.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "669c98169ce559d5dea183321e2d1f77" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.36.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "7872deaa142c04564b60ecf747d6c64e" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.36.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "f3be3dc845e43f554795f99cdfcfe056" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.36.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "604a687ccc4eda5384c0cdd6bfb7ce8c" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.37.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "c8d84ec208254fedf2a501cf11387b5b" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.37.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a65bb61f3410797558e33f35224da13a" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.37.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "6797a4c7ef7f2259e464bf163d050846" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.37.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "536829d2be23bd13a427d6bc71209e86" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.38.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0400791d56834f98233c6d49c0551cb4" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.38.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b0fb70adc1ae7a7ccce055f7e14a860f" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.38.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "d6edf85c6e98e21bc1c20e19711e9d0a" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.38.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "cf7deb1a5452e19b9555e838e76e8d53" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.39.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "7aa85dd5098c8da2322d847f135c82e4" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.39.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "2e3dadee443611c5c500e209923f40f2" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.39.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "ae5ff02ddc94b63e22feafa08835eadf" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.39.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "cd40690067c1fc56e5691dc42f9573dc" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.40.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "bca8b308788f3142f45b18eb32d79d01" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.40.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "557e4ad17874e2552be46fb22818ec1c" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.40.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "aaea8ca7014a58e446d285a8ddfefe09" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.40.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "ef63b0787d5a92af34ca536bb84f7164" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.41.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "05dcca9cd56a9636a9484df68b068351" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.41.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "89e72bdc3a6de41a45f81f8062bc9957" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.41.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "f3c856eb9e9de74f6d0cc852d3b8b921" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.41.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "83d2bb1e1ee58fa9e75650b7f166ece2" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.42.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "9a07a3bb1028bdb25065cc80d6ff5371" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.42.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8274125b1c0b68ff93ef8b9d983a3033" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.42.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "8ee5fd9692c64c3db42bf23f398d26ce" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.42.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "3ecfb92f2e0dbb32a9eeca40890341ce" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.43.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "3aa9885182a7319e2437f23d31342ef1" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.43.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "fd5619a8fe09763661d26c3d561208a0" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.43.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "1d162302debd25d0738ebc27e3845f6d" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.43.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "e4a91e37cd73d9771f0582183591e19b" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.44.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "bed296e48e01809cb8212c76825a665b" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.44.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "08b639d0c18fcbf7c1a63fb749c5fa9b" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.44.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "7709a967a0c53f0efc08eeb59241cd4e" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.44.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "143bc4d87bca35dc24a66f9c75c7e115" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.45.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "55ba57cff3ea364b21d309c6913dce62" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.45.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b915ec4f1a2bd7b9dd4caf2280583c7c" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.45.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "966fb0b1ad391b49d663ef61a7097c5d" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.45.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "ca0ae515b744050af3357f02553ee0a2" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.46.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "975731a3575a366de954484c040611d2" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.46.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a7a90fffbc267a8794a0d749a395466a" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.46.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "091009488a5c71860f1887ef965c433f" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.46.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "bbba14991bf109f5a4343bde92618bcf" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.47.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "5af00b0e44df026910ce20f1ce1ea148" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.47.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ae2639212fe60abadedffff3988c2467" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.47.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "01f1fae460e95c1a2b4a4698d6491a7f" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.47.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "e35e3802e509e8248c9d0d65ffb4d84d" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.6.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "000574e1f5c50c785709e320aae224be" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.6.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ece6470c49ccf429481fac9d62cdb081" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.6.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "982b234ab6c6dbb3af157be8426f6448" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.6.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "7dbef8765e3ff403d9f3a26e33cbece6" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.7.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "63a11aa5b639ae8a4b92c37f8d3206e7" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.7.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5244855502245015ee7e5e293593aebd" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.7.feed_forward.gate_up_proj.weight", "shape": [ 32768, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "e81ede3b54e31a30cf854b89a1ff3944" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 201326592, "records": [ { "name": "model.layers.7.feed_forward.w2.weight", "shape": [ 6144, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 201326592, "byteOffset": 0 } ], "md5sum": "d44a76d8f61f376a4268aef32ed059d7" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.8.attention.wo.weight", "shape": [ 6144, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ef10714372f5ad230109ebaca8a4c09d" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.8.attention.wqkv.weight", "shape": [ 8192, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "97179e5140290259938be5fe5033d0b1" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 1137180672, "records": [ { "name": "output.weight", "shape": [ 92544, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1137180672, "byteOffset": 0 } ], "md5sum": "466cef09e5f4201900768e4ffb19c0d5" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 1191936, "records": [ { "name": "model.layers.0.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 0 }, { "name": "model.layers.0.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12288 }, { "name": "model.layers.1.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24576 }, { "name": "model.layers.1.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 36864 }, { "name": "model.layers.2.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 49152 }, { "name": "model.layers.2.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 61440 }, { "name": "model.layers.10.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 73728 }, { "name": "model.layers.10.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 86016 }, { "name": "model.layers.8.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 98304 }, { "name": "model.layers.8.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 110592 }, { "name": "model.layers.9.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 122880 }, { "name": "model.layers.9.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 135168 }, { "name": "model.layers.11.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 147456 }, { "name": "model.layers.11.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 159744 }, { "name": "model.layers.12.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 172032 }, { "name": "model.layers.12.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 184320 }, { "name": "model.layers.13.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 196608 }, { "name": "model.layers.13.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 208896 }, { "name": "model.layers.14.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 221184 }, { "name": "model.layers.14.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 233472 }, { "name": "model.layers.15.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 245760 }, { "name": "model.layers.15.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 258048 }, { "name": "model.layers.16.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 270336 }, { "name": "model.layers.16.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 282624 }, { "name": "model.layers.17.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 294912 }, { "name": "model.layers.17.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 307200 }, { "name": "model.layers.18.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 319488 }, { "name": "model.layers.18.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 331776 }, { "name": "model.layers.19.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 344064 }, { "name": "model.layers.19.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 356352 }, { "name": "model.layers.20.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 368640 }, { "name": "model.layers.20.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 380928 }, { "name": "model.layers.21.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 393216 }, { "name": "model.layers.21.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 405504 }, { "name": "model.layers.22.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 417792 }, { "name": "model.layers.22.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 430080 }, { "name": "model.layers.23.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 442368 }, { "name": "model.layers.23.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 454656 }, { "name": "model.layers.24.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 466944 }, { "name": "model.layers.24.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 479232 }, { "name": "model.layers.25.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 491520 }, { "name": "model.layers.25.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 503808 }, { "name": "model.layers.26.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 516096 }, { "name": "model.layers.26.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 528384 }, { "name": "model.layers.27.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 540672 }, { "name": "model.layers.27.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 552960 }, { "name": "model.layers.28.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 565248 }, { "name": "model.layers.28.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 577536 }, { "name": "model.layers.29.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 589824 }, { "name": "model.layers.29.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 602112 }, { "name": "model.layers.30.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 614400 }, { "name": "model.layers.30.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 626688 }, { "name": "model.layers.3.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 638976 }, { "name": "model.layers.3.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 651264 }, { "name": "model.layers.4.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 663552 }, { "name": "model.layers.4.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 675840 }, { "name": "model.layers.5.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 688128 }, { "name": "model.layers.5.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 700416 }, { "name": "model.layers.31.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 712704 }, { "name": "model.layers.31.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 724992 }, { "name": "model.layers.32.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 737280 }, { "name": "model.layers.32.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 749568 }, { "name": "model.layers.33.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 761856 }, { "name": "model.layers.33.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 774144 }, { "name": "model.layers.34.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 786432 }, { "name": "model.layers.34.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 798720 }, { "name": "model.layers.35.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 811008 }, { "name": "model.layers.35.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 823296 }, { "name": "model.layers.36.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 835584 }, { "name": "model.layers.36.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 847872 }, { "name": "model.layers.37.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 860160 }, { "name": "model.layers.37.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 872448 }, { "name": "model.layers.38.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 884736 }, { "name": "model.layers.38.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 897024 }, { "name": "model.layers.39.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 909312 }, { "name": "model.layers.39.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 921600 }, { "name": "model.layers.40.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 933888 }, { "name": "model.layers.40.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 946176 }, { "name": "model.layers.41.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 958464 }, { "name": "model.layers.41.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 970752 }, { "name": "model.layers.42.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 983040 }, { "name": "model.layers.42.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 995328 }, { "name": "model.layers.43.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1007616 }, { "name": "model.layers.43.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1019904 }, { "name": "model.layers.44.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1032192 }, { "name": "model.layers.44.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1044480 }, { "name": "model.layers.45.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1056768 }, { "name": "model.layers.45.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1069056 }, { "name": "model.layers.46.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1081344 }, { "name": "model.layers.46.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1093632 }, { "name": "model.layers.47.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1105920 }, { "name": "model.layers.47.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1118208 }, { "name": "model.norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1130496 }, { "name": "model.layers.6.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1142784 }, { "name": "model.layers.6.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1155072 }, { "name": "model.layers.7.attention_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1167360 }, { "name": "model.layers.7.ffn_norm.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1179648 } ], "md5sum": "80e313b0363be60fb7c8c56c2175d304" } ] }