{ "metadata": { "ParamSize": 451, "ParamBytes": 65527752704.0, "BitsPerParam": 12.329999342718688 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 1557135360, "records": [ { "name": "lm_head.weight", "shape": [ 152064, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1557135360, "byteOffset": 0 } ], "md5sum": "8bf83241e43ac68721913d2dfa887427" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.62.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "3a2cd1521a89755d087c7ba2fa591a22" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "95b9a88dcea26230448ec154fd309afb" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.63.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "4fd13fa67b9d5f79cc6485ed3d28979a" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "c1c5ffbdad9f682c66f72cab3b2f9402" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.63.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "8ec877c0d41243d26b338f0562d4abb6" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.63.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "dbbe3ea33a7aa6d151f8bc502ad9b64f" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 1557135360, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 152064, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1557135360, "byteOffset": 0 } ], "md5sum": "cc0c1ba518eaa3a45514e23abe4299c8" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "750a88fb02cce3c9f9919e42baba12a6" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "da377e8667831dd98d0d0c22547bd75d" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.0.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "5bdf20548218be77a7cc28ded0da6a22" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "626a8765ebdfb72e7d3324dada211c73" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "38b458ac7408ee93886b6b2d082c4b40" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "26e1f1547c884f3d6170c7521d568d24" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.1.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "ecb48889a46e0cd9705965dea8ca8b78" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "733627a24c80d698dea227bed0290726" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "a380a25fddf0adeee2453f41b1dc3984" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.2.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "bc8f376d52c6a428062337492099bdfa" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "6a1658a6cd3a731449eb5a906cc7fc5a" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "340f1c8e1a808db4cc39c006d912f217" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.10.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "94a8ed1f476a15cd98dcf7f4a772e2e5" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "0e687e6b0f756040ce7243a415e612cf" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "2a01718352968cdf4a68a9678c972aef" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "cac3fe21d05f4aa6aaf787987be8689e" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "1857b413f6bd10e9888a6026d8f1809b" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "15f10db579cac98acf81a3a2180726f2" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.7.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "c0ca940488253a2b8fa0d083623556a6" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "d4cbfcc5f41e2adc544406cb0ce4ab5e" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "3f6a60bec1f40d4c7244c28ddeabd043" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "7cff4b6593c52592af05e39cfba9ab82" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.8.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "94cb0a9a50dd7b458bbbea475ea99343" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "ca164b21bb804f2bb6d49875e99286a0" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "b7a5d09f3e0a9ce96e4a0435ce7df127" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "8145db65e2d1d6dad0a26f5d89b68bf4" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.9.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "f3f5fdf5b130a7496a2aa2f2ed95ca93" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "74bf77a8b0b6bb746263bec57a06544f" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "b2a43d7b3a15e059fb78bc3ce78d1c57" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "38c28e49c3ab8e7c4796ca9d9f469166" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "2f324f045f7aa9d5ee601fc62aeed7e0" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.11.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "44191dad41262303d0b6e9d6553b35f2" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "660f71a6cb7b671ca561ae031780d3b5" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "8f0b35a54f22264278ceeabc5cc149f2" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "a8ae995a159da72c7a5126b5a73676ea" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.12.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "70e63f491cc05a5c088be46e30d7a237" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "43116a9325c85d861ee524cc54c3285d" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "be602943c36425031049441fdf367ce6" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "fefc52a122bb90b9e4160d4f21083282" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.13.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "44c326edafee467efbc3bcc081d152ea" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "cf327a42bf8ad722518ec1f5065bc144" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "815db9399848debf0289298735fd1e9a" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.14.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "3183e28451248be86a0503d29cac0ba8" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "0668b933d0df82b676db0207a6f0dfe8" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "72070d68b4457530588912b42136553e" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "54e25f7d5622098ec4816ccba2fa8a40" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "101194e3f29a6fa7fca3392ae71a6e99" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.15.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "d020377a2d58c9a5a88b0bb9d3d6725d" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "7d9305def91c159483fc90a50d8761e6" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "77312040fe72c0e759162f4da92d8b4e" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "69c033460bed6b238d95167a8a373a11" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.16.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "df321c1badefdf81f1ae8e021af33a6f" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "7d1dc778a6d0a1391dba239a3422d0fc" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "4e6914b150b6ede90e7d573f97d3fa88" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "14d32a094cb85b8d21d62eed56e9bc9f" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.17.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "a5b2444cd06bc8370c1b2510955eb858" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "7631a2493f512c0866b1894af0f3f959" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "9a8d1a5b0ae17c691a3f47bd6dd577af" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.18.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "515b6d77a5a19fe03a83aaf496c9c837" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "0170e0410ea94109e16b9a91fee0003d" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "0fa558ffd19835d3a02133f782b9768e" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "efb9eef8ae6e3fc857f865f95314c503" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "fe19ba3901cb7af7ec8a07e2940338b2" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.19.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "4af2bc50b8cd4b5ba6f1e1df25eeb84d" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "480663ab7ea11557bdb28e9052df7f69" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "90b584b5015ee5243d5c352944695163" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "48f3874237b80f428f1195b500ee5435" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.20.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "b71f052deed39570d4139917a1d698a2" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "e05bcc9e16b866eef42aa09105f3d271" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "a1dc616584480f814cbcc5da0b56224c" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "e305c78d267a1e20e2a2fa849f67c56b" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.21.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "9031feff0ea7fdb40cb727ba244f34bb" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "2ad41401f287b6a24ac57daed89fba87" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "4abe4f498c949dead7cdef9a9e87a22f" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.22.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "fc8c3e78b21d686dc7d939a2d08105e2" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "c9c6e287473a7851b8e7684c2a115024" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "abcab40cbc31e0ed742745421e4b65b4" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "938d107f0afd73606bffb7f00f44ad25" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "d788d3d8ae166f0a619c88a3711a924c" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.3.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "62e1244d26b917c3c4d0ca004cba8e35" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "75c653de3e52fc99520ce3e29905f80e" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "d51ea58285691f611d78cd02ce26c0dd" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "ebd9332447c57517cfeac7593a958743" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.4.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "0bfab887063b835b606c70898032b220" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "6c210f559d703f4592ee1b437c292749" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "0b32060a374a6fd5cb27c4f0a1948ac2" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "bb0e294734767c2cd14b75b3cdefd5e2" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.5.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "3b4085746957cdaaadc0f9fcd0c8d815" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "a0e56966dae73cc347e3439fc7bd956b" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.6.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "878d62b08e4679074bd287d54132ce6f" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "6208900eca0ad97d0f973ca6b73a9467" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "7a10c872f2e245573a11f489328cb86a" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "617800a9792578ea58626cc97e007069" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "4edef1f89fae9c9a3a0fb4cf7d0ba9fa" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.23.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "4502cdd09003027c2f76d3bf754e9253" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "caf2c302f6c233b1d877ba3dac853b8c" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "aae1d1063e0f26fc31bcbc61ed964c7a" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "091048af5fc512f2a8b0cd5eddd330a5" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.24.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "92040f85317411dd7792387e734b0c87" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "35e7b9a0f2306d1eaa1690abb064936e" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "7f10c420b1401401082906bef611e27e" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "0e3e3499feab3e009c3c40551e507752" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.25.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "efe4cc48e7a69e52f9de75d80edd26d8" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "0e7d7675d9750fe2f1fb07b940c31c0d" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "c4537ef2899fef8356eb40619f9b5a51" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.26.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "e3e98fa2cb8fcaffe09b196353963bb0" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "8c5cd2d2c7b6117b8724b108578dcd94" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.26.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "76c9392eecacd28d1c8a960859112f54" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "43dbfe44305d0a296bb6da74255f597b" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "104915a0f842b7a0419101832ad3cbc7" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.27.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "1f5a7b6d42cb544feea0db40fc284fc2" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "b823fdd97ad9ef4996dd8ff96ab05cc5" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.28.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "8d643b15b5a2d4eba07b550da3ac3e5d" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "10eeae436effdf1384e3907950f9357d" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.28.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "36178a237622b0bcb58b75b770750df0" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.28.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "e6e6df278a289f7b566641083311d4b5" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.29.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "f2ccdc26383173b03130d3ef526d708d" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "810b35df6bbad5fbdabc72e279cf821b" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.29.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "058cf853f014928a58df7997d9c4de5c" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.29.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "a511497ea7ad26022566ac65066068ce" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "9b147b97641c70ff6481b91e5c09a84f" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.30.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "cb91a194a947817eeac636075c36780e" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.30.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "5139e8c5c43b17ad3b205351fbdf9acc" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.30.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "945275a89392ae8586b07ab6ef243057" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.31.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "a2cc19e92cfa9a3461b468c9b5c43fd6" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "928e2983a51a2b9448d25f9bf372acc5" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.31.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "c314569027504f03f1f1bc7dfe2d9649" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.31.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "21973fbd3d55a15d0bec2c22187f89a8" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.32.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "f4a2f3ac11b800e1a785306d3d6e4c09" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "65701d1def8b234d7c5b70f9604d8a9d" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.32.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "bc8496f3e09dd875463a44893a6a0f12" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.32.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "aa0465b4e7d7e66d88da7c8d175a436b" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.33.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "019c54b71de6936d32c4c7630fa1c682" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "7405fef66e60745665619fdd88e394ff" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.33.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "76a6c24b88c4721f214e1ce572d22196" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.33.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "1f800ecc0901116d0614f9785e8be745" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "d60681f257cac7ab4c119e31923e7d4e" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.34.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "605af38d1de46b0e6f3b7601cb9ce273" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.34.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "90ae0a995e3761eb8084fecf4fe9f8a6" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.34.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "1e4b20b8cf5281b2cec7470327559956" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.35.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "0628853f1b0f249d0c99ea245bf1b02d" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "c2a9f22338360c1b8d3daf50934e7c31" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.35.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "7a5dc9d5cfdc781b15f29bfc9ba24ba9" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.35.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "2fbfc985aa3ba0f23965098e997caaf4" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.36.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "55d190e07fb37f0fa37af4d3b8e9e921" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "785074fa2659756360cc9b31d9325977" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.36.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "e3c64009020284a002ae4aaf50494750" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.36.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "055fa478d2dbf81cc3ced6a47ab20c73" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.37.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "effd16464de2908676f2b5483675ad65" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "aaebd803cf478d8ffb0cfab4964b5abe" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.37.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "5717c4b8e048b2676dc2d242dedc5aad" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.37.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "989fe41fdaea4b41c01c38dc3dd42c42" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "3f7faa7612d04b7a3c99f872fbee21d5" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.38.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "5b230f18df49601ec948e8ce38aac649" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.38.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "f90398d1e2b93007491236caccdc8b08" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.38.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "09995791efe429f4c2fc696c1a7c208a" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.39.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "ff13316a1e0a82466495b8e0c38f7e89" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "6b0658d94bab5354af835b29a7be59e0" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.39.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "a0ec51171b6adfb34fe32052c82cf752" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.39.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "79d037f6768d3bd1cf8ffff43728f2f5" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.40.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "b163b3d7814b2d6ba81cd73808c6fdb8" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "1479e18636d33aca550146d789590365" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.40.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "a081296116b4703ee22f6704d8a22b94" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.40.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "94ac620c9b351d28e790039f9da6bac5" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.41.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "fedd0c494d4f7923501fde38d321f7b4" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "070e4bffdd4cf4030b3e4c74d898439f" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.41.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "ecf2e3620b5ca6f901fc3c0e68f50353" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.41.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "2929dc1e91c7454abc36a9bded55eb83" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "71ba5e7f388b629d3b63b0ec83725440" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.42.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "6e2da3a673f30ecc13ec8063703a4121" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.42.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "17d1f4ed8b26ebbcd67c84b49219b548" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.42.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "40eedf2a31713e0970e7aa65287dbbec" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.43.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "c4e991a2f65757fd6b0c45780167aa8b" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "6d9140953cd3ac0f934008101206011b" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.43.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "281d865d1179e91c1ca06dd1b4ca2894" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.43.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "b21eff725f9513a5ccc6a70487d5ac4c" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.44.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "57b0baea5ce26a48354835856787b898" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "57195cba5383dd4bcc53d1e0014ffeb0" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.44.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "46653a6dc93a1c325482f6db7fce7e0b" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.44.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "5412751ecf265e2439fd6d66ba460a91" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.45.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "0d4bb0c1d5ccaba2783fac54270249b8" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "5eb1a2ccccd164c777e06d4e1e037dd6" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.45.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "31616fd48f0632d4fe4095502ffede4c" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.45.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "a37784bbbf4a722e567300699f41ae8e" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "90d812751ed32508df3da71cb7de7fda" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.46.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "8094a6ab919904ef2857deeb8798a40f" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.46.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "f834fddd452f55df81695c3c16b8d4b3" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.46.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "c12f184b20d60374af2a92d3deb7a23e" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.47.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "e1891734ca2d5386f30fe717427ffed7" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "68d37336d31bd5ea515f9ce58a5887ce" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.47.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "0896fcd5530ca9510d41abec3e916da0" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.47.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "c2ce6f93f4f9cac65447f5e433ce18d7" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.48.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "8febb38777fbfb56a590e968030fc3ff" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "e0358ef4d4132b46ab62a064f22385e2" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.48.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "2b2a9c34bc715c546a37fbec3d293362" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.48.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "76d438f416ef5e64fb7888952fffceda" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.49.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "5974263bffa5d9591e2fd27f1d80721f" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "e81222993066d9194285e327de8d557f" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.49.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "f64b70baee4f1242b1c10f8789722096" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.49.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "fbaf9f6e807fe5e57f223adc582c5e1e" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "b6638d52b616d1eddfcc65a5a413f8c2" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.50.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "aa456c76d66b524369c51e475bb66bf8" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.50.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "dbcf0da5abc3287a42b2049d6392e1f0" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.50.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "e068db487ec2561d789b907c6a32c940" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.51.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "e1bc132bc1cfb27d0663c172b60587ee" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "a468e0d867546666c63b47c308f67334" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.51.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "19ca5fc8aa7b8aca37062cbaef76b638" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.51.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "8ae104e8420e8cab019012183da340ee" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.52.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "2b26e2aba7070769ea6ae1e85d2f4d21" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "c1e3499e9551fd87b9e6a4d26d8acdea" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.52.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "90159beff9d4d61950e4746e98dcfca0" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.52.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "3d61f51306df5ef973d6b1497834099e" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.53.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "19392c3121dee5cb8f41bfe993977ed8" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "8fa22319dfd5b456ed6b70e7c723c49a" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.53.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "35146444828dd5ec1c9eb71489d8f489" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.53.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "aead69344acdf746bc2f7d97a4f42d18" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "21d8863aeca009899dc1f0a9bfe2696c" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.54.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "a50faf95a3cdd5e0535918ef468071d5" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.54.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "1f27e7c3c3ea66f70e706308c6610196" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.54.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "b94d091f52353c5873918d31454cb4c3" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.55.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "147d7063643eb98cde306ff59142f107" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "d2ba937faa11ec52a033c52418d58475" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.55.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "c6915cae05677c408181d31c1bd1e622" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.55.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "69975ec8dbf4b363dc3b196afe4ae688" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.56.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "f11a350ff95430c19f6bb366e8b54122" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "e58c72e8558392d36d5f2e4394204dcf" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.56.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "7ae6dd8e728d0b008347828280d79d80" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.56.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "3e9cee798bb637f1501c436ba4d483d9" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.57.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "010357438083741ead9839651f725bd4" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "f86d31fdec9d8f0476080099c8dda8ab" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.57.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "ce3067d42cf6bef83eaa23fa7d4f7f11" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.57.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "c5139bde0699b35bb588c8bad734c818" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "db9c49a2d2ec3b9dc6eb4351ab99d3b0" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.58.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "7aa97c791d016675d96efd04306068b0" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.58.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "6b39a5a8f48d67ef306a70410ee26775" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.58.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "3cbda3a8674f794875435ad2050a95f3" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.59.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "9618378a815711e3d01f54bfad5d4322" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "66170b368faf02df9019bcae03a32127" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.59.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "c79efcbb69d9e5dd9d5843e0192e49b2" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.59.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "284805d3144bffc50c487ef361fce9fa" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.60.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "df23920619282523a0b183658e53fc4c" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "1affbc29eb99c6e5b57bc4a18d09bbb4" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.60.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "82be8662cd70bf3f22a6e4ebe001a1b5" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.60.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "213bed5c3c1c75e5065104a3249ffd79" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.61.mlp.down_proj.weight", "shape": [ 5120, 27648 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "6c9d7e2ee8b3fcc633559a1c71160802" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 566231040, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.weight", "shape": [ 55296, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 566231040, "byteOffset": 0 } ], "md5sum": "40ca240fbd828f636fe6361887f4f3a9" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.61.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "356b79c9417873073adb840aa44c8b25" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.61.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "9b86e7ddd2bf311993557233996e7a0e" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.62.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "38d0b7fc31df42bfa81ca0af57b13e7b" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.62.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "ee566b569ecac8bfe4ef1aa2ffc00857" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 2238464, "records": [ { "name": "model.layers.63.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 0 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 10240 }, { "name": "model.layers.63.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20480 }, { "name": "model.norm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 34816 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 45056 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 55296 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 65536 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 79872 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 90112 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 100352 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 114688 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 124928 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 135168 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 149504 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 159744 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 169984 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 184320 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 194560 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 204800 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 219136 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 229376 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 239616 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 253952 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 264192 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 274432 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 288768 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 299008 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 309248 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 323584 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 333824 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 344064 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 358400 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 368640 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 378880 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 393216 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 403456 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 413696 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 428032 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 438272 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 448512 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 462848 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 473088 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 483328 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 497664 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 507904 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 518144 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 532480 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 542720 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 552960 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 567296 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 577536 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 587776 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 602112 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 612352 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 622592 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 636928 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 647168 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 657408 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 671744 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 681984 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 692224 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 706560 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 716800 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 727040 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 741376 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 751616 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 761856 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 776192 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 786432 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 796672 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 811008 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 821248 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 831488 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 845824 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 856064 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 866304 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 880640 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 890880 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 901120 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 915456 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 925696 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 935936 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 950272 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 960512 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 970752 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 985088 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 995328 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1005568 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1019904 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1030144 }, { "name": "model.layers.28.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1040384 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1054720 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1064960 }, { "name": "model.layers.29.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1075200 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1089536 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1099776 }, { "name": "model.layers.30.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1110016 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1124352 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1134592 }, { "name": "model.layers.31.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1144832 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1159168 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1169408 }, { "name": "model.layers.32.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1179648 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1193984 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1204224 }, { "name": "model.layers.33.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1214464 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1228800 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1239040 }, { "name": "model.layers.34.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1249280 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1263616 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1273856 }, { "name": "model.layers.35.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1284096 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1298432 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1308672 }, { "name": "model.layers.36.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1318912 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1333248 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1343488 }, { "name": "model.layers.37.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1353728 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1368064 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1378304 }, { "name": "model.layers.38.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1388544 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1402880 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1413120 }, { "name": "model.layers.39.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1423360 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1437696 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1447936 }, { "name": "model.layers.40.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1458176 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1472512 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1482752 }, { "name": "model.layers.41.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1492992 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1507328 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1517568 }, { "name": "model.layers.42.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1527808 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1542144 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1552384 }, { "name": "model.layers.43.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1562624 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1576960 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1587200 }, { "name": "model.layers.44.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1597440 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1611776 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1622016 }, { "name": "model.layers.45.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1632256 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1646592 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1656832 }, { "name": "model.layers.46.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1667072 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1681408 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1691648 }, { "name": "model.layers.47.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1701888 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1716224 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1726464 }, { "name": "model.layers.48.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1736704 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1751040 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1761280 }, { "name": "model.layers.49.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1771520 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1785856 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1796096 }, { "name": "model.layers.50.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1806336 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1820672 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1830912 }, { "name": "model.layers.51.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1841152 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1855488 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1865728 }, { "name": "model.layers.52.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1875968 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1890304 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1900544 }, { "name": "model.layers.53.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1910784 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1925120 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1935360 }, { "name": "model.layers.54.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1945600 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1959936 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1970176 }, { "name": "model.layers.55.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1980416 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1994752 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2004992 }, { "name": "model.layers.56.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2015232 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2029568 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2039808 }, { "name": "model.layers.57.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2050048 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2064384 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2074624 }, { "name": "model.layers.58.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2084864 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2099200 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2109440 }, { "name": "model.layers.59.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2119680 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2134016 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2144256 }, { "name": "model.layers.60.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2154496 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2168832 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2179072 }, { "name": "model.layers.61.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2189312 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2203648 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2213888 }, { "name": "model.layers.62.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 2224128 } ], "md5sum": "a3d9affbabb167395433a90aaa743ac2" } ] }