internlm2_5-7b-q0f16-MLC / ndarray-cache.json
riczhou's picture
Upload folder using huggingface_hub
7c49bb3 verified
raw
history blame
103 kB
{
"metadata": {
"ParamSize": 195,
"ParamBytes": 15475417088.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.0.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "c85abeb33e635ed67c86be21688380ef"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.0.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "36159a52842a41f383e5e2e947b0c2fd"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.0.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "f67347eeb93d880c34249ee0c0b71830"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.0.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "9a0410f1337e9987bf8d6e304cad8879"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.1.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "b0c605936a5ae0f6fff497467648643e"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.1.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "b513bc160d1c298ab33b485d64be65f8"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.1.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "5d81ccb2da2cb44fb17c9b51c05fe5d4"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.1.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "5dae230711711a10a52dffa50d870e6c"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.10.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "9c8abff0efaa0aa024a73af1f773e9c8"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.10.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "273034e06c634e408d80184b9d0955ed"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.10.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "cd063e7317b993a91f82cbad6cc0b781"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.10.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "311c9a9013126ea2a6fec972009001a3"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.11.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "20b896940c7a30037ca7bdcf5eb32316"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.11.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "eda5bed77cfc01d467f26801ed60a85e"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.11.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "39968b4b7966bd264248b7ef85bc5634"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.11.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "5ac3649a5712d9a3682397e1fcd3fb12"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.12.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2f2f7937bf331c54fda6f3a44608e469"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.12.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "7b15a854bf684f701c6fc93fb5914b97"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.12.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "99b51865865975495e79fea354ae5fdb"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.12.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "2dd0b420d0bb371f11d0c0000ece6ee4"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.13.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "b0adee27e6d8a686f5a6d5c2dd100e7c"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.13.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "e170610c9bece6347867f3f20afad727"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.13.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "fa99c22a34ebe69253916cb78b76e519"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.13.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "4fec158428db8e50b96137c48504e022"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.14.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "f79d06268ff762cc79dff080dafe9a39"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.14.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "787c610d91c4797804d8d77bd0de94d2"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.14.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "ca0dbec60c156847e6a92dd6a58030b3"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.14.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "18567e92de98664536305394595ff099"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.15.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2c2c7cd0f9602b064266abad664b8392"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.15.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "dd77c999b52010bafc00a1105de55573"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.15.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "95568e71efdc31554511572ed293de5b"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.15.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "019bfb71c3991d0719d5b2d562392aed"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.16.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "73035c11a4521c1f07506edcf723fed3"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.16.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "0283f1ceca602f0c248e74c0d27c614c"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.16.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "de9fa681408c6d96cbeec23ab5b42e84"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.16.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "18ae561f1cc102f514532c7c988f40d3"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.17.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e0f315312c99d64e1e7323acd59e407d"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.17.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "0c890df3a5b6a7701d9a95d5464c19c7"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.17.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "9696ca1734540c16e68207c502ea42f1"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.17.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "b115ee996174cababaabae3f56ad8b74"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.18.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "0eb303e43e6469ef8578874c3e3b593b"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.18.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "6d4b2e1d061f3a5da0cffd19119007a0"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.18.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "a7ab36ff9c26b649006bbf6d48840146"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.18.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "35378beaa7a8a1366fbf2bd2abfc1bcd"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.19.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "7cc624c2fc79cabd6a5e6ace634fb21d"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.19.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "0e87aff1a01ebdd4f83336f927495543"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.19.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "fd5683b8ee53c303c8efe30efa5f89b5"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.19.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "bf0ee97a4aa5661ba2845e0f97ac71f5"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.2.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "90b12d2e8a0710207d950c56b8703548"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.2.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "5a9c5e3eb7d3ac6535ddb1cc37868f6f"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.2.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "7e3b8717ee1f17302e1933be8375a6dc"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.2.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "7d18d7d8bdf6a9c1ba24608d601f3ae0"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.20.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "fdf4f0103b73fd8d095fd8b97ea52510"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.20.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "88b0e6bbadaf0b61636a5223867e2283"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.20.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "788c50b61fea86fa59130bc108041829"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.20.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "2e83262d7f71b7ca2a1e9597e8b32536"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.21.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "937d96f8d7d2e04fa78d60c6a47a8f10"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.3.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "3b21b42a77d9c9c3aa39cb5ce4e8db0e"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.3.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "89a6e69e94e3052b7e3326aa61e5e45a"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.3.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "6201da19326ed023d8db80806b5fa3cb"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.3.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "04e5f2458df36fef00a578d88bde9fc7"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.4.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "1b8989afd09fa29bef5c39bb09f604be"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.4.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "d8fa8ffca2298b3dc9b7a5b79de40256"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.4.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "f58a3ab542620a09fb8206345cc460a1"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.4.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "d1559c63806ae55994ec898db1a16636"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.5.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "bd30d9ef0d5b82b166c6546df8b62afe"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.5.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "3b2e3e2af6c57bdd0e65fc3a8c76d556"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.5.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "f94d0ca517fd1c2ce7bd35d3bc60fb7f"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.5.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "1ab93ff918032e1b54568ad3c1f23984"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.6.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "0a364cdbdd1ed8f3950c0ddcd3e454e0"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.6.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "7e709d99117382ed05b45c67e8a2d490"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.6.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "c9b47529ea4a8cc2adb76431e24a9e3e"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.6.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "c229dccc302924c47d74f41e1bda775d"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.7.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "70d6ab04af0f1c24f28938d1db318866"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.7.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "c08dc6b29ec49efbddffa73de0e6bb05"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.7.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "e9fe9e2969b987506e611cb8786d9f90"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.7.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "bc27b26dfa54f1992b8150f51c5d9c9f"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.8.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "4072cf852fdce54a6c199cc1d5134cab"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.8.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "ed7faf35ee0fecfb98d32bf6d4a9e20d"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.8.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "82f345a81cbe7e58099796a9865179c5"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.8.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "a3e2f962a494a9b84f00fc8a3078e946"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.9.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "7dd1dee9afa6e4119595a3012fc799c1"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.9.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "a0599ce34bdd47f38ab51985cc5ce529"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.9.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "10acf40cd05df0adf02334445d3a41f2"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.9.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "0008f22d8fb20b35159f8430c03f6d14"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 758120448,
"records": [
{
"name": "model.tok_embeddings.weight",
"shape": [
92544,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 758120448,
"byteOffset": 0
}
],
"md5sum": "2e0c02cd641edda8cdf2363efed6acba"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.21.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "1d833d03ea09acc66f9cfe3217b18cd1"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.21.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "4a92ebeb76a009f860e4afb1796fd152"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.21.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "d032c8fc44ee44df0968faa447e88071"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.22.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "cb36d9c32d0576ec01bfa29e6401d1d5"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.22.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "3b5e7ebd5e4ea32c7a890bc4d6ca40c8"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.22.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "0b94a3a13ed036a577feae4ab29c027f"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.22.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "f1b96b135705868c1b7115e5e544cea5"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.23.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "cc73ef104cf9fe3a17c91b7c64269617"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.23.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "0d02b2ec0c43a08d547a91e8022f6a3f"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.23.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "6123ea6ce5d0f0f222bfe1dcacac88a9"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.23.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "72016e2fb7fb7df2d0db02a18ef66f66"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.24.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "344b62ac917b5a4a432d51f1c4426cc2"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.24.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "12dd41ea24a108b70ff66465feb0ce55"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.24.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "96fb0998e0994e5ff4ca2e63b3dda289"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.24.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "2f442adaac0cf3a4b4245ac8025fb298"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.25.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "72a83163819d2d0e543c56376ac1c6ac"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.25.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "589e0d8969e7c54f98f7e3b62dcf4098"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.25.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "4644244a602165984604072971a81302"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.25.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "6163f8d259aa27b31370dcf1c944df1a"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.26.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "4a1702b79aa50b556919220ff4c55705"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.26.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "00cfde8e1d690703fbdd06a23cd50ab8"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.26.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "73fde1321f858089aa75aca7df8f2629"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.26.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "592d0052043ce9e233d868d6f4926134"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.27.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2e6207aa7e2c6f2543d900499fd72b7d"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.27.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "a74ceaa0feb415f62a133b08ac9e2c26"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.27.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "57a50a5b792bcfa4d487dfa76128a14a"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.27.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "fbfa5b000d7b622c8998eb3e8fcc426b"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.28.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "b5fe07f354306b7273e5deeabff78d75"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.28.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "c013a4d9f58268e532a446f4076219a0"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.28.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "6ea57319ec964fd39563155ed3b72dc0"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.28.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "316155f570183ceda21a9ec0a08c3411"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.29.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "fbfe32718d2ce6898d6acf915272c91c"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.29.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "7ca11eef7af84f2765cb44a9cd25d6db"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.29.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "716a6115a83ae2fe5fb9a845ee44a541"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.29.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "2366fdb1599204c11aecb4c6c9823215"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.30.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "d4b30e6819166a946681c7eadb487992"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.30.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "929bbc0837bbb9bef2552e56ef2d28ae"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.30.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "1bbc955966489b5ca902e169a0f7bd97"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.30.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "976fce6de5c99454cb8372561c958dc5"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.31.attention.wo.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "20e3fb50b957b3f3234731925a95390d"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.31.attention.wqkv.weight",
"shape": [
6144,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "bd6d664e8455545532c69b376b7ae226"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 234881024,
"records": [
{
"name": "model.layers.31.feed_forward.gate_up_proj.weight",
"shape": [
28672,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 234881024,
"byteOffset": 0
}
],
"md5sum": "5a6520ad5463ff230d8c4a128dc0231f"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.31.feed_forward.w2.weight",
"shape": [
4096,
14336
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "dee15b5cc439b04fbd8bdb5b50965e1e"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 758120448,
"records": [
{
"name": "output.weight",
"shape": [
92544,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 758120448,
"byteOffset": 0
}
],
"md5sum": "95636dd014d4e967756b469d16285713"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 532480,
"records": [
{
"name": "model.layers.0.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 0
},
{
"name": "model.layers.0.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8192
},
{
"name": "model.layers.1.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16384
},
{
"name": "model.layers.1.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24576
},
{
"name": "model.layers.10.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32768
},
{
"name": "model.layers.10.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 40960
},
{
"name": "model.layers.11.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 49152
},
{
"name": "model.layers.11.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 57344
},
{
"name": "model.layers.12.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 65536
},
{
"name": "model.layers.12.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 73728
},
{
"name": "model.layers.13.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 81920
},
{
"name": "model.layers.13.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 90112
},
{
"name": "model.layers.14.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 98304
},
{
"name": "model.layers.14.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 106496
},
{
"name": "model.layers.15.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 114688
},
{
"name": "model.layers.15.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 122880
},
{
"name": "model.layers.16.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 131072
},
{
"name": "model.layers.16.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 139264
},
{
"name": "model.layers.17.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 147456
},
{
"name": "model.layers.17.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 155648
},
{
"name": "model.layers.18.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 163840
},
{
"name": "model.layers.18.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 172032
},
{
"name": "model.layers.19.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 180224
},
{
"name": "model.layers.19.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 188416
},
{
"name": "model.layers.2.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 196608
},
{
"name": "model.layers.2.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 204800
},
{
"name": "model.layers.20.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 212992
},
{
"name": "model.layers.20.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 221184
},
{
"name": "model.layers.3.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 229376
},
{
"name": "model.layers.3.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 237568
},
{
"name": "model.layers.4.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 245760
},
{
"name": "model.layers.4.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 253952
},
{
"name": "model.layers.5.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 262144
},
{
"name": "model.layers.5.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 270336
},
{
"name": "model.layers.6.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 278528
},
{
"name": "model.layers.6.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 286720
},
{
"name": "model.layers.7.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 294912
},
{
"name": "model.layers.7.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 303104
},
{
"name": "model.layers.8.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 311296
},
{
"name": "model.layers.8.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 319488
},
{
"name": "model.layers.9.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 327680
},
{
"name": "model.layers.9.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 335872
},
{
"name": "model.layers.21.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 344064
},
{
"name": "model.layers.21.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 352256
},
{
"name": "model.layers.22.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 360448
},
{
"name": "model.layers.22.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 368640
},
{
"name": "model.layers.23.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 376832
},
{
"name": "model.layers.23.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 385024
},
{
"name": "model.layers.24.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 393216
},
{
"name": "model.layers.24.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 401408
},
{
"name": "model.layers.25.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 409600
},
{
"name": "model.layers.25.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 417792
},
{
"name": "model.layers.26.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 425984
},
{
"name": "model.layers.26.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 434176
},
{
"name": "model.layers.27.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 442368
},
{
"name": "model.layers.27.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 450560
},
{
"name": "model.layers.28.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 458752
},
{
"name": "model.layers.28.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 466944
},
{
"name": "model.layers.29.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 475136
},
{
"name": "model.layers.29.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 483328
},
{
"name": "model.layers.30.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 491520
},
{
"name": "model.layers.30.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 499712
},
{
"name": "model.layers.31.attention_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 507904
},
{
"name": "model.layers.31.ffn_norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 516096
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 524288
}
],
"md5sum": "5a303c6b7b2cd1e342730c7458b4784f"
}
]
}