{ "metadata": { "ParamSize": 195, "ParamBytes": 15475417088.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c85abeb33e635ed67c86be21688380ef" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.0.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "36159a52842a41f383e5e2e947b0c2fd" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.0.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f67347eeb93d880c34249ee0c0b71830" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.0.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9a0410f1337e9987bf8d6e304cad8879" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b0c605936a5ae0f6fff497467648643e" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.1.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "b513bc160d1c298ab33b485d64be65f8" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.1.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5d81ccb2da2cb44fb17c9b51c05fe5d4" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.1.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5dae230711711a10a52dffa50d870e6c" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9c8abff0efaa0aa024a73af1f773e9c8" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.10.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "273034e06c634e408d80184b9d0955ed" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.10.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "cd063e7317b993a91f82cbad6cc0b781" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.10.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "311c9a9013126ea2a6fec972009001a3" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "20b896940c7a30037ca7bdcf5eb32316" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.11.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "eda5bed77cfc01d467f26801ed60a85e" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.11.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "39968b4b7966bd264248b7ef85bc5634" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.11.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5ac3649a5712d9a3682397e1fcd3fb12" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2f2f7937bf331c54fda6f3a44608e469" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.12.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "7b15a854bf684f701c6fc93fb5914b97" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.12.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "99b51865865975495e79fea354ae5fdb" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.12.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2dd0b420d0bb371f11d0c0000ece6ee4" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b0adee27e6d8a686f5a6d5c2dd100e7c" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.13.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "e170610c9bece6347867f3f20afad727" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.13.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fa99c22a34ebe69253916cb78b76e519" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.13.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4fec158428db8e50b96137c48504e022" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f79d06268ff762cc79dff080dafe9a39" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.14.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "787c610d91c4797804d8d77bd0de94d2" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.14.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ca0dbec60c156847e6a92dd6a58030b3" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.14.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "18567e92de98664536305394595ff099" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2c2c7cd0f9602b064266abad664b8392" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.15.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "dd77c999b52010bafc00a1105de55573" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.15.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "95568e71efdc31554511572ed293de5b" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.15.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "019bfb71c3991d0719d5b2d562392aed" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "73035c11a4521c1f07506edcf723fed3" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.16.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "0283f1ceca602f0c248e74c0d27c614c" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.16.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "de9fa681408c6d96cbeec23ab5b42e84" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.16.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "18ae561f1cc102f514532c7c988f40d3" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e0f315312c99d64e1e7323acd59e407d" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.17.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "0c890df3a5b6a7701d9a95d5464c19c7" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.17.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9696ca1734540c16e68207c502ea42f1" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.17.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b115ee996174cababaabae3f56ad8b74" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.18.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0eb303e43e6469ef8578874c3e3b593b" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.18.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "6d4b2e1d061f3a5da0cffd19119007a0" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.18.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a7ab36ff9c26b649006bbf6d48840146" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.18.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "35378beaa7a8a1366fbf2bd2abfc1bcd" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.19.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7cc624c2fc79cabd6a5e6ace634fb21d" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.19.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "0e87aff1a01ebdd4f83336f927495543" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.19.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fd5683b8ee53c303c8efe30efa5f89b5" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.19.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bf0ee97a4aa5661ba2845e0f97ac71f5" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "90b12d2e8a0710207d950c56b8703548" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.2.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "5a9c5e3eb7d3ac6535ddb1cc37868f6f" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.2.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7e3b8717ee1f17302e1933be8375a6dc" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.2.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7d18d7d8bdf6a9c1ba24608d601f3ae0" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.20.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fdf4f0103b73fd8d095fd8b97ea52510" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.20.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "88b0e6bbadaf0b61636a5223867e2283" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.20.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "788c50b61fea86fa59130bc108041829" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.20.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2e83262d7f71b7ca2a1e9597e8b32536" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.21.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "937d96f8d7d2e04fa78d60c6a47a8f10" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3b21b42a77d9c9c3aa39cb5ce4e8db0e" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.3.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "89a6e69e94e3052b7e3326aa61e5e45a" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.3.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6201da19326ed023d8db80806b5fa3cb" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.3.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "04e5f2458df36fef00a578d88bde9fc7" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1b8989afd09fa29bef5c39bb09f604be" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.4.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "d8fa8ffca2298b3dc9b7a5b79de40256" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.4.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f58a3ab542620a09fb8206345cc460a1" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.4.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d1559c63806ae55994ec898db1a16636" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "bd30d9ef0d5b82b166c6546df8b62afe" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.5.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "3b2e3e2af6c57bdd0e65fc3a8c76d556" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.5.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f94d0ca517fd1c2ce7bd35d3bc60fb7f" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.5.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1ab93ff918032e1b54568ad3c1f23984" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0a364cdbdd1ed8f3950c0ddcd3e454e0" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.6.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "7e709d99117382ed05b45c67e8a2d490" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.6.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c9b47529ea4a8cc2adb76431e24a9e3e" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.6.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c229dccc302924c47d74f41e1bda775d" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "70d6ab04af0f1c24f28938d1db318866" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.7.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "c08dc6b29ec49efbddffa73de0e6bb05" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.7.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e9fe9e2969b987506e611cb8786d9f90" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.7.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bc27b26dfa54f1992b8150f51c5d9c9f" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4072cf852fdce54a6c199cc1d5134cab" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.8.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ed7faf35ee0fecfb98d32bf6d4a9e20d" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.8.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "82f345a81cbe7e58099796a9865179c5" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.8.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a3e2f962a494a9b84f00fc8a3078e946" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7dd1dee9afa6e4119595a3012fc799c1" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.9.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "a0599ce34bdd47f38ab51985cc5ce529" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.9.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "10acf40cd05df0adf02334445d3a41f2" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.9.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0008f22d8fb20b35159f8430c03f6d14" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 758120448, "records": [ { "name": "model.tok_embeddings.weight", "shape": [ 92544, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 758120448, "byteOffset": 0 } ], "md5sum": "2e0c02cd641edda8cdf2363efed6acba" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.21.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1d833d03ea09acc66f9cfe3217b18cd1" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.21.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4a92ebeb76a009f860e4afb1796fd152" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.21.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d032c8fc44ee44df0968faa447e88071" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.22.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cb36d9c32d0576ec01bfa29e6401d1d5" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.22.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "3b5e7ebd5e4ea32c7a890bc4d6ca40c8" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.22.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0b94a3a13ed036a577feae4ab29c027f" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.22.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f1b96b135705868c1b7115e5e544cea5" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.23.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cc73ef104cf9fe3a17c91b7c64269617" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.23.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "0d02b2ec0c43a08d547a91e8022f6a3f" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.23.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6123ea6ce5d0f0f222bfe1dcacac88a9" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.23.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "72016e2fb7fb7df2d0db02a18ef66f66" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.24.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "344b62ac917b5a4a432d51f1c4426cc2" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.24.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "12dd41ea24a108b70ff66465feb0ce55" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.24.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "96fb0998e0994e5ff4ca2e63b3dda289" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.24.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2f442adaac0cf3a4b4245ac8025fb298" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.25.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "72a83163819d2d0e543c56376ac1c6ac" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.25.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "589e0d8969e7c54f98f7e3b62dcf4098" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.25.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4644244a602165984604072971a81302" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.25.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6163f8d259aa27b31370dcf1c944df1a" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.26.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4a1702b79aa50b556919220ff4c55705" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.26.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "00cfde8e1d690703fbdd06a23cd50ab8" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.26.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "73fde1321f858089aa75aca7df8f2629" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.26.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "592d0052043ce9e233d868d6f4926134" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.27.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2e6207aa7e2c6f2543d900499fd72b7d" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.27.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "a74ceaa0feb415f62a133b08ac9e2c26" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.27.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "57a50a5b792bcfa4d487dfa76128a14a" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.27.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "fbfa5b000d7b622c8998eb3e8fcc426b" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.28.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b5fe07f354306b7273e5deeabff78d75" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.28.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "c013a4d9f58268e532a446f4076219a0" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.28.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6ea57319ec964fd39563155ed3b72dc0" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.28.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "316155f570183ceda21a9ec0a08c3411" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.29.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fbfe32718d2ce6898d6acf915272c91c" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.29.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "7ca11eef7af84f2765cb44a9cd25d6db" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.29.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "716a6115a83ae2fe5fb9a845ee44a541" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.29.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2366fdb1599204c11aecb4c6c9823215" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.30.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d4b30e6819166a946681c7eadb487992" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.30.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "929bbc0837bbb9bef2552e56ef2d28ae" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.30.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1bbc955966489b5ca902e169a0f7bd97" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.30.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "976fce6de5c99454cb8372561c958dc5" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.31.attention.wo.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "20e3fb50b957b3f3234731925a95390d" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.31.attention.wqkv.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "bd6d664e8455545532c69b376b7ae226" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.31.feed_forward.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5a6520ad5463ff230d8c4a128dc0231f" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.31.feed_forward.w2.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "dee15b5cc439b04fbd8bdb5b50965e1e" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 758120448, "records": [ { "name": "output.weight", "shape": [ 92544, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 758120448, "byteOffset": 0 } ], "md5sum": "95636dd014d4e967756b469d16285713" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 532480, "records": [ { "name": "model.layers.0.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 0 }, { "name": "model.layers.0.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8192 }, { "name": "model.layers.1.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16384 }, { "name": "model.layers.1.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24576 }, { "name": "model.layers.10.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32768 }, { "name": "model.layers.10.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 40960 }, { "name": "model.layers.11.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 49152 }, { "name": "model.layers.11.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 57344 }, { "name": "model.layers.12.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 65536 }, { "name": "model.layers.12.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 73728 }, { "name": "model.layers.13.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 81920 }, { "name": "model.layers.13.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 90112 }, { "name": "model.layers.14.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 98304 }, { "name": "model.layers.14.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 106496 }, { "name": "model.layers.15.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 114688 }, { "name": "model.layers.15.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 122880 }, { "name": "model.layers.16.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 131072 }, { "name": "model.layers.16.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 139264 }, { "name": "model.layers.17.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 147456 }, { "name": "model.layers.17.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 155648 }, { "name": "model.layers.18.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 163840 }, { "name": "model.layers.18.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 172032 }, { "name": "model.layers.19.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 180224 }, { "name": "model.layers.19.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 188416 }, { "name": "model.layers.2.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 196608 }, { "name": "model.layers.2.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 204800 }, { "name": "model.layers.20.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 212992 }, { "name": "model.layers.20.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 221184 }, { "name": "model.layers.3.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 229376 }, { "name": "model.layers.3.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 237568 }, { "name": "model.layers.4.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 245760 }, { "name": "model.layers.4.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 253952 }, { "name": "model.layers.5.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 262144 }, { "name": "model.layers.5.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 270336 }, { "name": "model.layers.6.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 278528 }, { "name": "model.layers.6.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 286720 }, { "name": "model.layers.7.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 294912 }, { "name": "model.layers.7.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 303104 }, { "name": "model.layers.8.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 311296 }, { "name": "model.layers.8.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 319488 }, { "name": "model.layers.9.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 327680 }, { "name": "model.layers.9.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 335872 }, { "name": "model.layers.21.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 344064 }, { "name": "model.layers.21.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 352256 }, { "name": "model.layers.22.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 360448 }, { "name": "model.layers.22.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 368640 }, { "name": "model.layers.23.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 376832 }, { "name": "model.layers.23.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 385024 }, { "name": "model.layers.24.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 393216 }, { "name": "model.layers.24.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 401408 }, { "name": "model.layers.25.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 409600 }, { "name": "model.layers.25.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 417792 }, { "name": "model.layers.26.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 425984 }, { "name": "model.layers.26.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 434176 }, { "name": "model.layers.27.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 442368 }, { "name": "model.layers.27.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 450560 }, { "name": "model.layers.28.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 458752 }, { "name": "model.layers.28.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 466944 }, { "name": "model.layers.29.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 475136 }, { "name": "model.layers.29.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 483328 }, { "name": "model.layers.30.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 491520 }, { "name": "model.layers.30.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 499712 }, { "name": "model.layers.31.attention_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 507904 }, { "name": "model.layers.31.ffn_norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 516096 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 524288 } ], "md5sum": "5a303c6b7b2cd1e342730c7458b4784f" } ] }