diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..dcbb686eec5204673a5d9e2cad3c858f5c3d13a1 --- /dev/null +++ b/config.json @@ -0,0 +1,28 @@ +{ + "_name_or_path": "/mnt/shared/checkpoints/pranali_bas_llama3_70b_finetune_ambiguity_mixed_0624_2112_hf", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 8192, + "model_type": "llama", + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.40.1", + "use_cache": true, + "vocab_size": 128257 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c30ef4959ae49254e8704297093b2e0004413dcb --- /dev/null +++ b/generation_config.json @@ -0,0 +1,9 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.40.1" +} diff --git a/pytorch_model-00001-of-00062.bin b/pytorch_model-00001-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..b09a5a3b78743334da778b06757830f069774cf0 --- /dev/null +++ b/pytorch_model-00001-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0fcd3ec1fbe457b4d0145e33eef3030ed74822bfc995b686e50ed6263ab273c +size 4806707853 diff --git a/pytorch_model-00002-of-00062.bin b/pytorch_model-00002-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..0af1859843fb9774051db4a4c072e5434bff87ca --- /dev/null +++ b/pytorch_model-00002-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71ba621cae85d84dc794c4133f3f12b9f3620153fae03596d3de569987228a21 +size 4362145918 diff --git a/pytorch_model-00003-of-00062.bin b/pytorch_model-00003-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..6f12f6e457807ca130e44c75b3a4734a2b9082c6 --- /dev/null +++ b/pytorch_model-00003-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d25e3300e4e025febfd31a6c797cf2a5e9f279c1a954b8143d504b7b5f2544cc +size 4362145918 diff --git a/pytorch_model-00004-of-00062.bin b/pytorch_model-00004-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..0dc81c0f114e3dc39c72dd0d1638d3eb26461b55 --- /dev/null +++ b/pytorch_model-00004-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07e611bd25a8cc16520a5ec7eefd97d5bb8879d97f5542272aff6e56f4831b9d +size 4966193130 diff --git a/pytorch_model-00005-of-00062.bin b/pytorch_model-00005-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..b6606969f220b5e4276e0573ea519d4c302e7a09 --- /dev/null +++ b/pytorch_model-00005-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47ff9006325d63b984068adf44bbb4b622da2b4f07b4d757cf7995e32817dfdd +size 4362145918 diff --git a/pytorch_model-00006-of-00062.bin b/pytorch_model-00006-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..669c58a4b003dc228c89cdc430f4a0a0b0e99444 --- /dev/null +++ b/pytorch_model-00006-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9ca81a5b66e3b5ae398949da56893424b4b8dc8238b8271f4669e7a7d578733 +size 4362145918 diff --git a/pytorch_model-00007-of-00062.bin b/pytorch_model-00007-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..3d40328e5660c9669752b1fab0ab1c010b8feb14 --- /dev/null +++ b/pytorch_model-00007-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b85998ec9c4f9795a4ce68fffb1bcb6ad48bac19dbaca5afce326333941019aa +size 4966193130 diff --git a/pytorch_model-00008-of-00062.bin b/pytorch_model-00008-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..9a6c5251673736f7c8294b4b371db2f13cc49e3d --- /dev/null +++ b/pytorch_model-00008-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:074f179446f8c599c722a0e6a69c453c78345db496c49b3b822afeadee7783c7 +size 4362145918 diff --git a/pytorch_model-00009-of-00062.bin b/pytorch_model-00009-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..94e9087a75aa0fba3e7fff77c015f1b322dc7d60 --- /dev/null +++ b/pytorch_model-00009-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19557abfd55cb4874d466919e9ec444be1ebed3203261f9c1ecb256830adf094 +size 4362145918 diff --git a/pytorch_model-00010-of-00062.bin b/pytorch_model-00010-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..d797ba822d956d8da72593f58635b0c7426150e4 --- /dev/null +++ b/pytorch_model-00010-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8101ac006d79521107d3c974a4068635c9f7e4731b1355205195cc9a545abda8 +size 4966193130 diff --git a/pytorch_model-00011-of-00062.bin b/pytorch_model-00011-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..2c3a2ed20e335d2c8ca2e36f9c32c01357debe11 --- /dev/null +++ b/pytorch_model-00011-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cd99861e6c58f3f8be0becb4ad8a4c62dcac1274fc6670932fa6542b9133b2c +size 4362145982 diff --git a/pytorch_model-00012-of-00062.bin b/pytorch_model-00012-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..7a81a325728995485d082db4d6f20fce395a5781 --- /dev/null +++ b/pytorch_model-00012-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ad1e37e9c564b9e9ba37314ec9382369cdb4de8a71990f7be32ccb754f97966 +size 4362145982 diff --git a/pytorch_model-00013-of-00062.bin b/pytorch_model-00013-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..a0b2d8c97d89e1e9fbaa5c7fa2f04ec5cecd8434 --- /dev/null +++ b/pytorch_model-00013-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df492ac9fe2f8bee6c5b789742dc7bead52595378dec55871c71b010002e129d +size 4966193130 diff --git a/pytorch_model-00014-of-00062.bin b/pytorch_model-00014-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..ec352f34f49aa71aaf94edd1c3cdb02c120c8347 --- /dev/null +++ b/pytorch_model-00014-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba9e34c82aa5b27177d20b1efc03c0ac53f17aadb9986a481d4f04cc38c24454 +size 4362145982 diff --git a/pytorch_model-00015-of-00062.bin b/pytorch_model-00015-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..67ea4323a88e04f7fe8e4705bfa2257bd73ce0b1 --- /dev/null +++ b/pytorch_model-00015-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c722f4c53b82ab2efb023003cee5dd011901c6b09891a673b184818ab4b8172 +size 4362145982 diff --git a/pytorch_model-00016-of-00062.bin b/pytorch_model-00016-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..4e442cebea03182fba5933593e868f835e3a61c8 --- /dev/null +++ b/pytorch_model-00016-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3ad5c97e1e4aafc2a8517251037a919eacdf36787f4d362ff19be4964908126 +size 4966193130 diff --git a/pytorch_model-00017-of-00062.bin b/pytorch_model-00017-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..765ab9ff143793641f8c7c32ae0de0f822d37867 --- /dev/null +++ b/pytorch_model-00017-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d843646c34e9929def5bcb13b607637a519c4a4282ab7ad6346fa20b28be2b74 +size 4362145982 diff --git a/pytorch_model-00018-of-00062.bin b/pytorch_model-00018-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..44b42f8692a56504bdd6fbfb997cf07d95bc5ff5 --- /dev/null +++ b/pytorch_model-00018-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2032e03a29c5e3b3877f8b216f39344d367a9c4bce61fd17741dc62929797309 +size 4362145982 diff --git a/pytorch_model-00019-of-00062.bin b/pytorch_model-00019-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..e4c94195e0c0151a7a8f74d78e1c6ac0bbdf56ac --- /dev/null +++ b/pytorch_model-00019-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44ddcc46a424ded91986fcbef61a1344b5699448848c04bb8a9360519f351272 +size 4966193130 diff --git a/pytorch_model-00020-of-00062.bin b/pytorch_model-00020-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..af1d352ef492f1df85094d48991f053e01d474c6 --- /dev/null +++ b/pytorch_model-00020-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e173657d74bdc5d028a4b4a9f63836b63b42ca7db52d82e03bec89d99e600e9 +size 4362145982 diff --git a/pytorch_model-00021-of-00062.bin b/pytorch_model-00021-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..11400a4e1961421ef313ebe3f25685ba43b426cb --- /dev/null +++ b/pytorch_model-00021-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d3e9ceebb68afaac8af93fabd681271ef180ad22c80dd94479dae9e0b59a877 +size 4362145982 diff --git a/pytorch_model-00022-of-00062.bin b/pytorch_model-00022-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..21cb7fc66660f047c67e8a38624417fa28d079cc --- /dev/null +++ b/pytorch_model-00022-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bf47c6a6f946d60d3ac5dfac0df841732ae81f2cb1343bc3cadaeaa1aef68ad +size 4966193130 diff --git a/pytorch_model-00023-of-00062.bin b/pytorch_model-00023-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..e870823d39e5469a605d84603c1c05cfe77af342 --- /dev/null +++ b/pytorch_model-00023-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29d20f9eb4c898b48d2b29a244ec7545b0559e0cf03f9a230bf46ef822e592e5 +size 4362145982 diff --git a/pytorch_model-00024-of-00062.bin b/pytorch_model-00024-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..7f127f13c8e2ea37129c638a9b482a7fb25b9d8a --- /dev/null +++ b/pytorch_model-00024-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e473fbb33f85dcd6718eb453f8f0c6c30beba972399b22f1d11c188d8e2af694 +size 4362145982 diff --git a/pytorch_model-00025-of-00062.bin b/pytorch_model-00025-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..730247ca5caf3db5fcd1b83d45ffa09307ddcf65 --- /dev/null +++ b/pytorch_model-00025-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ef3a42a4713358a8913f8a3d943de15eff91ce6f87033643941ccf60d6ae15d +size 4966193130 diff --git a/pytorch_model-00026-of-00062.bin b/pytorch_model-00026-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..c75117c0aa7e4f8c2d4dd8590cefe59561c0d2ed --- /dev/null +++ b/pytorch_model-00026-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10c0a366442b0cf9c2fc10f8aeb6db72d514ba54dbc95c4717dfb10cd5fb0cec +size 4362145982 diff --git a/pytorch_model-00027-of-00062.bin b/pytorch_model-00027-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e7c6877757aa53f44380f9a6e59e68a61985fad --- /dev/null +++ b/pytorch_model-00027-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8edc567dddea60a61e9f4e8e25f39c652d9782ef7b8c5ecf9481f509c06ef92d +size 4362145982 diff --git a/pytorch_model-00028-of-00062.bin b/pytorch_model-00028-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..278290264df7c83dc30df29c947a3945142e83be --- /dev/null +++ b/pytorch_model-00028-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be47763bd41cc65129c6ca0a208a5a1e6250993da0cace0b0c3eecd548d8d467 +size 4966193130 diff --git a/pytorch_model-00029-of-00062.bin b/pytorch_model-00029-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..46eee59cf7b259b773b0507f7ea180f741f4ec8a --- /dev/null +++ b/pytorch_model-00029-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbb72a246401a2166d83c0c4568ee772732ad737b99770cb640971e2df7ddb17 +size 4362145982 diff --git a/pytorch_model-00030-of-00062.bin b/pytorch_model-00030-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..7816e786580ebe10541402952fddabbf93772773 --- /dev/null +++ b/pytorch_model-00030-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa4e9591b85ba75fbeb1d343a90f83e43ed7b995c8335007921d15c1072c29bd +size 4362145982 diff --git a/pytorch_model-00031-of-00062.bin b/pytorch_model-00031-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..91d74f18349dd266c01f598c418d670a10f64407 --- /dev/null +++ b/pytorch_model-00031-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfd35bb7828c79c3763ac0c9c706b4a93be9b9dcfaffc2216d90219df67adba4 +size 4966193130 diff --git a/pytorch_model-00032-of-00062.bin b/pytorch_model-00032-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3b267e4ed43b7ec8f364abbec2267f4e786efde --- /dev/null +++ b/pytorch_model-00032-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d1dfdf540b884638a5bd92a51952f6d82983ba3b88abc03d90b004019706aac +size 4362145982 diff --git a/pytorch_model-00033-of-00062.bin b/pytorch_model-00033-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..bfb7b679ca63a7fce7e050b7a066abf273b13a8f --- /dev/null +++ b/pytorch_model-00033-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c94edb78c22f8b18eabd1c4d8d8541355c7fd02846473d4eeb1660338130e35 +size 4362145982 diff --git a/pytorch_model-00034-of-00062.bin b/pytorch_model-00034-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..1258280baefd66494f4ba636ad468b73ecac0872 --- /dev/null +++ b/pytorch_model-00034-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c9e827a480e998826c2d9db22eb82d37ee5cda6c73f85c776df55c7d9c7c465 +size 4966193130 diff --git a/pytorch_model-00035-of-00062.bin b/pytorch_model-00035-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..6260133859c13b9a917ec12008601ec803d5a80b --- /dev/null +++ b/pytorch_model-00035-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31f86bcf1ff476facce0479b7a89d7c7a775b9c63117dbb6853b4cb8dc21918a +size 4362145982 diff --git a/pytorch_model-00036-of-00062.bin b/pytorch_model-00036-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..2bff85acb497dc3d219a18033e27f7e0d7183334 --- /dev/null +++ b/pytorch_model-00036-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46ae14db49b4a204d6708f6748fac722d5e6760b710597db0451ba6003aeca4d +size 4362145982 diff --git a/pytorch_model-00037-of-00062.bin b/pytorch_model-00037-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..8fa62d91f553b7480dd09d7cc2c115a0b710001b --- /dev/null +++ b/pytorch_model-00037-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a15ef6c56a343756d79843398305fdc5de6f9f007fcc9cacd21ea33117b86d4 +size 4966193130 diff --git a/pytorch_model-00038-of-00062.bin b/pytorch_model-00038-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..07693906df0eaee9ac6c1070fd2e6c2d7d7787fb --- /dev/null +++ b/pytorch_model-00038-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bea9def60874c9ab5d13fb07f4218ba020ebacb93479087c794fd08488bfe9f +size 4362145982 diff --git a/pytorch_model-00039-of-00062.bin b/pytorch_model-00039-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..8fececf67755e6eab37f341261bd7e04e246203a --- /dev/null +++ b/pytorch_model-00039-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c5c0750d397e1d726ac1facd34b8535e0b552e82a10a38d2ceba85e9aee7678 +size 4362145982 diff --git a/pytorch_model-00040-of-00062.bin b/pytorch_model-00040-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..d4d23c21d544aeb0956cc920b1500f0cbd0e569a --- /dev/null +++ b/pytorch_model-00040-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:422fc52de5912359247630a2023760de3bde5086d59cdc04536f3bf063dde2e7 +size 4966193130 diff --git a/pytorch_model-00041-of-00062.bin b/pytorch_model-00041-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..12760de5cf1e10d98d294ccccdaeb48bbf3b8ad0 --- /dev/null +++ b/pytorch_model-00041-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16849d60e50dc34589ab370f6dab86cce5bce0111496b542e4414136ddea6d48 +size 4362145982 diff --git a/pytorch_model-00042-of-00062.bin b/pytorch_model-00042-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..638313274c5d912bafb634c96f4e7a819ad2fd54 --- /dev/null +++ b/pytorch_model-00042-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a89f8167d83db6c3d7e5d0340096f52c4726d2f3c057ca9f3bc463273a38a48b +size 4362145982 diff --git a/pytorch_model-00043-of-00062.bin b/pytorch_model-00043-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..b58652059603291190e45e6587184e1c4e5cd48a --- /dev/null +++ b/pytorch_model-00043-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7db11a92b6d75204a9ae73bd315480fe0da80a46bf3bb8f9521988cdd7d8f699 +size 4966193130 diff --git a/pytorch_model-00044-of-00062.bin b/pytorch_model-00044-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..d22ec033613e83bd6e2ca318bc8f89806102a16f --- /dev/null +++ b/pytorch_model-00044-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f4d8b9af8552c46bfc92f13f699ed652e5609fa083d286d5e35afcf64ecdd34 +size 4362145982 diff --git a/pytorch_model-00045-of-00062.bin b/pytorch_model-00045-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..14fd668fc38c142013952307456a6dfdc7abf2e8 --- /dev/null +++ b/pytorch_model-00045-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:208bd84518f4232215b36b0f03d07242c3f9802ceb392385c7343f7a373aac33 +size 4362145982 diff --git a/pytorch_model-00046-of-00062.bin b/pytorch_model-00046-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..1fb1baa9d00e9053e9322660af1e1ab12f00b355 --- /dev/null +++ b/pytorch_model-00046-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10748981ad627117a6517bc308ed9d9326bf6e9e4bafc9e9f68589e6402bc0d8 +size 4966193130 diff --git a/pytorch_model-00047-of-00062.bin b/pytorch_model-00047-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..8740a6cae862e1ac2c5a9c2d24251f9c02f63c11 --- /dev/null +++ b/pytorch_model-00047-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a1bcd263de685a4b9c14fe5aeef27a6bc8ad8d069a5198b13fd9d9c92956af9 +size 4362145982 diff --git a/pytorch_model-00048-of-00062.bin b/pytorch_model-00048-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..f05d63e894545b295d23ac0d37848fae299c8e54 --- /dev/null +++ b/pytorch_model-00048-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a5d6ff2fb6eb99df4cbb5ea6857a9bea1dd453d8a2fcd8e6d8a652dfe2fb009 +size 4362145982 diff --git a/pytorch_model-00049-of-00062.bin b/pytorch_model-00049-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..a46fc1c6c0d6cd716ef648c47175bbaf06de83d1 --- /dev/null +++ b/pytorch_model-00049-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af801a83acee901f9cffe2dc7c1cbb5f8afde35b52f7b87dd35ae28e77253f9f +size 4966193130 diff --git a/pytorch_model-00050-of-00062.bin b/pytorch_model-00050-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..0c46fef411e5e33b5a81a1ff8dfea435195eb056 --- /dev/null +++ b/pytorch_model-00050-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:387e3f1d4d81d64e42d1ac1d4962a24c7812da5e68af3569f8e87d7a055b2578 +size 4362145982 diff --git a/pytorch_model-00051-of-00062.bin b/pytorch_model-00051-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..938299c9a39b5080413e2f4e97283e6946f10a6a --- /dev/null +++ b/pytorch_model-00051-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e678af303c4fe428a2053112411d0242b55a7bf71739954633fb86b1cf99b146 +size 4362145982 diff --git a/pytorch_model-00052-of-00062.bin b/pytorch_model-00052-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..d2d71d1f9855c2a5f0a7d9a1b08e5997474801ba --- /dev/null +++ b/pytorch_model-00052-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7498055072aa2e815d5c06a92151a9a0be25227dcb1453b8d22d9a3e6fde45c2 +size 4966193130 diff --git a/pytorch_model-00053-of-00062.bin b/pytorch_model-00053-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..c6825f24276f4776e7b9c4a45aaf937d1eb9129c --- /dev/null +++ b/pytorch_model-00053-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a2b5d5bec30bd1d207f43d010604f32825130e6ae26041a4725327dce25fd7b +size 4362145982 diff --git a/pytorch_model-00054-of-00062.bin b/pytorch_model-00054-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..f094aa5cb7c760f4b89ba53f7303642629cea4ec --- /dev/null +++ b/pytorch_model-00054-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10972f96553a1983a627bd7d517731285c2007f20a1ac5bc7f87c7b803ce1921 +size 4362145982 diff --git a/pytorch_model-00055-of-00062.bin b/pytorch_model-00055-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..77af2a072b6a15f016556b7c3d18919ad3da8f01 --- /dev/null +++ b/pytorch_model-00055-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f1f901ba1a2a8bdcad0c05ff848fcbf65d52a8ef92d285957e6a9f84649cedc +size 4966193130 diff --git a/pytorch_model-00056-of-00062.bin b/pytorch_model-00056-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..344045e0be0504aa1d7977b90becc0d438179a2e --- /dev/null +++ b/pytorch_model-00056-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0f8c3331459d013ddaa1de08c045c6b4d39116bc1244943ca1702d5e2683b90 +size 4362145982 diff --git a/pytorch_model-00057-of-00062.bin b/pytorch_model-00057-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..64c70a4d266735b08bc54b23647015d862e9c60e --- /dev/null +++ b/pytorch_model-00057-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d6a690d498d74895189c1919f858136efdca597ec99f46cbd0c806353dbe907 +size 4362145982 diff --git a/pytorch_model-00058-of-00062.bin b/pytorch_model-00058-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..ef0df2fb269f41579bb6956dadddccd04732463c --- /dev/null +++ b/pytorch_model-00058-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc0531f99dd92c1dfc004040a382a619f6e904ec26c0bc73c0afad1cbf3d9388 +size 4966193130 diff --git a/pytorch_model-00059-of-00062.bin b/pytorch_model-00059-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..97e46413186d9deb7d25efeb091e70ea86396c01 --- /dev/null +++ b/pytorch_model-00059-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d668ec943ec88e7175add32bda94d974b32b515b808edefcf821b1a25364d13 +size 4362145982 diff --git a/pytorch_model-00060-of-00062.bin b/pytorch_model-00060-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..38d5f3a8124c1fda4b233f8727da82eb8210652a --- /dev/null +++ b/pytorch_model-00060-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6eb77016b49bab16ed85f99b44aa4a2201d02a8c8056b27b409ae82c6c7fb95 +size 4362145982 diff --git a/pytorch_model-00061-of-00062.bin b/pytorch_model-00061-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..0d9361a453e40fedafeb05822f37d98a75a9ab63 --- /dev/null +++ b/pytorch_model-00061-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca8368f54ea48c5630b97b5a2baf8dc1507d9989eec46e9307c14749eb72a4a1 +size 4362245172 diff --git a/pytorch_model-00062-of-00062.bin b/pytorch_model-00062-of-00062.bin new file mode 100644 index 0000000000000000000000000000000000000000..c9f0b17a008598f6a47cbb7b96dad4c3d9e24e2a --- /dev/null +++ b/pytorch_model-00062-of-00062.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf597303d90669da581338f6a96290cd2e100904c6ecee0d6ebce5309ba357ee +size 4202726789 diff --git a/pytorch_model.bin.index.json b/pytorch_model.bin.index.json new file mode 100644 index 0000000000000000000000000000000000000000..ae61c60787c56a19d5bf9b5d04aa3f51bb56c765 --- /dev/null +++ b/pytorch_model.bin.index.json @@ -0,0 +1,730 @@ +{ + "metadata": { + "total_size": 282214891520 + }, + "weight_map": { + "lm_head.weight": "pytorch_model-00062-of-00062.bin", + "model.embed_tokens.weight": "pytorch_model-00001-of-00062.bin", + "model.layers.0.input_layernorm.weight": "pytorch_model-00002-of-00062.bin", + "model.layers.0.mlp.down_proj.weight": "pytorch_model-00002-of-00062.bin", + "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00002-of-00062.bin", + "model.layers.0.mlp.up_proj.weight": "pytorch_model-00002-of-00062.bin", + "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00002-of-00062.bin", + "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00062.bin", + "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00062.bin", + "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00062.bin", + "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00062.bin", + "model.layers.1.input_layernorm.weight": "pytorch_model-00003-of-00062.bin", + "model.layers.1.mlp.down_proj.weight": "pytorch_model-00003-of-00062.bin", + "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00002-of-00062.bin", + "model.layers.1.mlp.up_proj.weight": "pytorch_model-00003-of-00062.bin", + "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00003-of-00062.bin", + "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00002-of-00062.bin", + "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00002-of-00062.bin", + "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00002-of-00062.bin", + "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00002-of-00062.bin", + "model.layers.10.input_layernorm.weight": "pytorch_model-00010-of-00062.bin", + "model.layers.10.mlp.down_proj.weight": "pytorch_model-00010-of-00062.bin", + "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00009-of-00062.bin", + "model.layers.10.mlp.up_proj.weight": "pytorch_model-00009-of-00062.bin", + "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00010-of-00062.bin", + "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00009-of-00062.bin", + "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00009-of-00062.bin", + "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00009-of-00062.bin", + "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00009-of-00062.bin", + "model.layers.11.input_layernorm.weight": "pytorch_model-00010-of-00062.bin", + "model.layers.11.mlp.down_proj.weight": "pytorch_model-00010-of-00062.bin", + "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00010-of-00062.bin", + "model.layers.11.mlp.up_proj.weight": "pytorch_model-00010-of-00062.bin", + "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00010-of-00062.bin", + "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00010-of-00062.bin", + "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00010-of-00062.bin", + "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00010-of-00062.bin", + "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00010-of-00062.bin", + "model.layers.12.input_layernorm.weight": "pytorch_model-00011-of-00062.bin", + "model.layers.12.mlp.down_proj.weight": "pytorch_model-00011-of-00062.bin", + "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00011-of-00062.bin", + "model.layers.12.mlp.up_proj.weight": "pytorch_model-00011-of-00062.bin", + "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00011-of-00062.bin", + "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00010-of-00062.bin", + "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00010-of-00062.bin", + "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00010-of-00062.bin", + "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00010-of-00062.bin", + "model.layers.13.input_layernorm.weight": "pytorch_model-00012-of-00062.bin", + "model.layers.13.mlp.down_proj.weight": "pytorch_model-00012-of-00062.bin", + "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00011-of-00062.bin", + "model.layers.13.mlp.up_proj.weight": "pytorch_model-00012-of-00062.bin", + "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00012-of-00062.bin", + "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00011-of-00062.bin", + "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00011-of-00062.bin", + "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00011-of-00062.bin", + "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00011-of-00062.bin", + "model.layers.14.input_layernorm.weight": "pytorch_model-00013-of-00062.bin", + "model.layers.14.mlp.down_proj.weight": "pytorch_model-00013-of-00062.bin", + "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00012-of-00062.bin", + "model.layers.14.mlp.up_proj.weight": "pytorch_model-00012-of-00062.bin", + "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00013-of-00062.bin", + "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00012-of-00062.bin", + "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00012-of-00062.bin", + "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00012-of-00062.bin", + "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00012-of-00062.bin", + "model.layers.15.input_layernorm.weight": "pytorch_model-00013-of-00062.bin", + "model.layers.15.mlp.down_proj.weight": "pytorch_model-00013-of-00062.bin", + "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00013-of-00062.bin", + "model.layers.15.mlp.up_proj.weight": "pytorch_model-00013-of-00062.bin", + "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00013-of-00062.bin", + "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00013-of-00062.bin", + "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00013-of-00062.bin", + "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00013-of-00062.bin", + "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00013-of-00062.bin", + "model.layers.16.input_layernorm.weight": "pytorch_model-00014-of-00062.bin", + "model.layers.16.mlp.down_proj.weight": "pytorch_model-00014-of-00062.bin", + "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00014-of-00062.bin", + "model.layers.16.mlp.up_proj.weight": "pytorch_model-00014-of-00062.bin", + "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00014-of-00062.bin", + "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00013-of-00062.bin", + "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00013-of-00062.bin", + "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00013-of-00062.bin", + "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00013-of-00062.bin", + "model.layers.17.input_layernorm.weight": "pytorch_model-00015-of-00062.bin", + "model.layers.17.mlp.down_proj.weight": "pytorch_model-00015-of-00062.bin", + "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00014-of-00062.bin", + "model.layers.17.mlp.up_proj.weight": "pytorch_model-00015-of-00062.bin", + "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00015-of-00062.bin", + "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00014-of-00062.bin", + "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00014-of-00062.bin", + "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00014-of-00062.bin", + "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00014-of-00062.bin", + "model.layers.18.input_layernorm.weight": "pytorch_model-00016-of-00062.bin", + "model.layers.18.mlp.down_proj.weight": "pytorch_model-00016-of-00062.bin", + "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00015-of-00062.bin", + "model.layers.18.mlp.up_proj.weight": "pytorch_model-00015-of-00062.bin", + "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00016-of-00062.bin", + "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00015-of-00062.bin", + "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00015-of-00062.bin", + "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00015-of-00062.bin", + "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00015-of-00062.bin", + "model.layers.19.input_layernorm.weight": "pytorch_model-00016-of-00062.bin", + "model.layers.19.mlp.down_proj.weight": "pytorch_model-00016-of-00062.bin", + "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00016-of-00062.bin", + "model.layers.19.mlp.up_proj.weight": "pytorch_model-00016-of-00062.bin", + "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00016-of-00062.bin", + "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00016-of-00062.bin", + "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00016-of-00062.bin", + "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00016-of-00062.bin", + "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00016-of-00062.bin", + "model.layers.2.input_layernorm.weight": "pytorch_model-00004-of-00062.bin", + "model.layers.2.mlp.down_proj.weight": "pytorch_model-00004-of-00062.bin", + "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00003-of-00062.bin", + "model.layers.2.mlp.up_proj.weight": "pytorch_model-00003-of-00062.bin", + "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00004-of-00062.bin", + "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00003-of-00062.bin", + "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00003-of-00062.bin", + "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00003-of-00062.bin", + "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00003-of-00062.bin", + "model.layers.20.input_layernorm.weight": "pytorch_model-00017-of-00062.bin", + "model.layers.20.mlp.down_proj.weight": "pytorch_model-00017-of-00062.bin", + "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00017-of-00062.bin", + "model.layers.20.mlp.up_proj.weight": "pytorch_model-00017-of-00062.bin", + "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00017-of-00062.bin", + "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00016-of-00062.bin", + "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00016-of-00062.bin", + "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00016-of-00062.bin", + "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00016-of-00062.bin", + "model.layers.21.input_layernorm.weight": "pytorch_model-00018-of-00062.bin", + "model.layers.21.mlp.down_proj.weight": "pytorch_model-00018-of-00062.bin", + "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00017-of-00062.bin", + "model.layers.21.mlp.up_proj.weight": "pytorch_model-00018-of-00062.bin", + "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00018-of-00062.bin", + "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00017-of-00062.bin", + "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00017-of-00062.bin", + "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00017-of-00062.bin", + "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00017-of-00062.bin", + "model.layers.22.input_layernorm.weight": "pytorch_model-00019-of-00062.bin", + "model.layers.22.mlp.down_proj.weight": "pytorch_model-00019-of-00062.bin", + "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00018-of-00062.bin", + "model.layers.22.mlp.up_proj.weight": "pytorch_model-00018-of-00062.bin", + "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00019-of-00062.bin", + "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00018-of-00062.bin", + "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00018-of-00062.bin", + "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00018-of-00062.bin", + "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00018-of-00062.bin", + "model.layers.23.input_layernorm.weight": "pytorch_model-00019-of-00062.bin", + "model.layers.23.mlp.down_proj.weight": "pytorch_model-00019-of-00062.bin", + "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00019-of-00062.bin", + "model.layers.23.mlp.up_proj.weight": "pytorch_model-00019-of-00062.bin", + "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00019-of-00062.bin", + "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00019-of-00062.bin", + "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00019-of-00062.bin", + "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00019-of-00062.bin", + "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00019-of-00062.bin", + "model.layers.24.input_layernorm.weight": "pytorch_model-00020-of-00062.bin", + "model.layers.24.mlp.down_proj.weight": "pytorch_model-00020-of-00062.bin", + "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00020-of-00062.bin", + "model.layers.24.mlp.up_proj.weight": "pytorch_model-00020-of-00062.bin", + "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00020-of-00062.bin", + "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00019-of-00062.bin", + "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00019-of-00062.bin", + "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00019-of-00062.bin", + "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00019-of-00062.bin", + "model.layers.25.input_layernorm.weight": "pytorch_model-00021-of-00062.bin", + "model.layers.25.mlp.down_proj.weight": "pytorch_model-00021-of-00062.bin", + "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00020-of-00062.bin", + "model.layers.25.mlp.up_proj.weight": "pytorch_model-00021-of-00062.bin", + "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00021-of-00062.bin", + "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00020-of-00062.bin", + "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00020-of-00062.bin", + "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00020-of-00062.bin", + "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00020-of-00062.bin", + "model.layers.26.input_layernorm.weight": "pytorch_model-00022-of-00062.bin", + "model.layers.26.mlp.down_proj.weight": "pytorch_model-00022-of-00062.bin", + "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00021-of-00062.bin", + "model.layers.26.mlp.up_proj.weight": "pytorch_model-00021-of-00062.bin", + "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00022-of-00062.bin", + "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00021-of-00062.bin", + "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00021-of-00062.bin", + "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00021-of-00062.bin", + "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00021-of-00062.bin", + "model.layers.27.input_layernorm.weight": "pytorch_model-00022-of-00062.bin", + "model.layers.27.mlp.down_proj.weight": "pytorch_model-00022-of-00062.bin", + "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00022-of-00062.bin", + "model.layers.27.mlp.up_proj.weight": "pytorch_model-00022-of-00062.bin", + "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00022-of-00062.bin", + "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00022-of-00062.bin", + "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00022-of-00062.bin", + "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00022-of-00062.bin", + "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00022-of-00062.bin", + "model.layers.28.input_layernorm.weight": "pytorch_model-00023-of-00062.bin", + "model.layers.28.mlp.down_proj.weight": "pytorch_model-00023-of-00062.bin", + "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00023-of-00062.bin", + "model.layers.28.mlp.up_proj.weight": "pytorch_model-00023-of-00062.bin", + "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00023-of-00062.bin", + "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00022-of-00062.bin", + "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00022-of-00062.bin", + "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00022-of-00062.bin", + "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00022-of-00062.bin", + "model.layers.29.input_layernorm.weight": "pytorch_model-00024-of-00062.bin", + "model.layers.29.mlp.down_proj.weight": "pytorch_model-00024-of-00062.bin", + "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00023-of-00062.bin", + "model.layers.29.mlp.up_proj.weight": "pytorch_model-00024-of-00062.bin", + "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00024-of-00062.bin", + "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00023-of-00062.bin", + "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00023-of-00062.bin", + "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00023-of-00062.bin", + "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00023-of-00062.bin", + "model.layers.3.input_layernorm.weight": "pytorch_model-00004-of-00062.bin", + "model.layers.3.mlp.down_proj.weight": "pytorch_model-00004-of-00062.bin", + "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00004-of-00062.bin", + "model.layers.3.mlp.up_proj.weight": "pytorch_model-00004-of-00062.bin", + "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00004-of-00062.bin", + "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00004-of-00062.bin", + "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00004-of-00062.bin", + "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00004-of-00062.bin", + "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00004-of-00062.bin", + "model.layers.30.input_layernorm.weight": "pytorch_model-00025-of-00062.bin", + "model.layers.30.mlp.down_proj.weight": "pytorch_model-00025-of-00062.bin", + "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00024-of-00062.bin", + "model.layers.30.mlp.up_proj.weight": "pytorch_model-00024-of-00062.bin", + "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00025-of-00062.bin", + "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00024-of-00062.bin", + "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00024-of-00062.bin", + "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00024-of-00062.bin", + "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00024-of-00062.bin", + "model.layers.31.input_layernorm.weight": "pytorch_model-00025-of-00062.bin", + "model.layers.31.mlp.down_proj.weight": "pytorch_model-00025-of-00062.bin", + "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00025-of-00062.bin", + "model.layers.31.mlp.up_proj.weight": "pytorch_model-00025-of-00062.bin", + "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00025-of-00062.bin", + "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00025-of-00062.bin", + "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00025-of-00062.bin", + "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00025-of-00062.bin", + "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00025-of-00062.bin", + "model.layers.32.input_layernorm.weight": "pytorch_model-00026-of-00062.bin", + "model.layers.32.mlp.down_proj.weight": "pytorch_model-00026-of-00062.bin", + "model.layers.32.mlp.gate_proj.weight": "pytorch_model-00026-of-00062.bin", + "model.layers.32.mlp.up_proj.weight": "pytorch_model-00026-of-00062.bin", + "model.layers.32.post_attention_layernorm.weight": "pytorch_model-00026-of-00062.bin", + "model.layers.32.self_attn.k_proj.weight": "pytorch_model-00025-of-00062.bin", + "model.layers.32.self_attn.o_proj.weight": "pytorch_model-00025-of-00062.bin", + "model.layers.32.self_attn.q_proj.weight": "pytorch_model-00025-of-00062.bin", + "model.layers.32.self_attn.v_proj.weight": "pytorch_model-00025-of-00062.bin", + "model.layers.33.input_layernorm.weight": "pytorch_model-00027-of-00062.bin", + "model.layers.33.mlp.down_proj.weight": "pytorch_model-00027-of-00062.bin", + "model.layers.33.mlp.gate_proj.weight": "pytorch_model-00026-of-00062.bin", + "model.layers.33.mlp.up_proj.weight": "pytorch_model-00027-of-00062.bin", + "model.layers.33.post_attention_layernorm.weight": "pytorch_model-00027-of-00062.bin", + "model.layers.33.self_attn.k_proj.weight": "pytorch_model-00026-of-00062.bin", + "model.layers.33.self_attn.o_proj.weight": "pytorch_model-00026-of-00062.bin", + "model.layers.33.self_attn.q_proj.weight": "pytorch_model-00026-of-00062.bin", + "model.layers.33.self_attn.v_proj.weight": "pytorch_model-00026-of-00062.bin", + "model.layers.34.input_layernorm.weight": "pytorch_model-00028-of-00062.bin", + "model.layers.34.mlp.down_proj.weight": "pytorch_model-00028-of-00062.bin", + "model.layers.34.mlp.gate_proj.weight": "pytorch_model-00027-of-00062.bin", + "model.layers.34.mlp.up_proj.weight": "pytorch_model-00027-of-00062.bin", + "model.layers.34.post_attention_layernorm.weight": "pytorch_model-00028-of-00062.bin", + "model.layers.34.self_attn.k_proj.weight": "pytorch_model-00027-of-00062.bin", + "model.layers.34.self_attn.o_proj.weight": "pytorch_model-00027-of-00062.bin", + "model.layers.34.self_attn.q_proj.weight": "pytorch_model-00027-of-00062.bin", + "model.layers.34.self_attn.v_proj.weight": "pytorch_model-00027-of-00062.bin", + "model.layers.35.input_layernorm.weight": "pytorch_model-00028-of-00062.bin", + "model.layers.35.mlp.down_proj.weight": "pytorch_model-00028-of-00062.bin", + "model.layers.35.mlp.gate_proj.weight": "pytorch_model-00028-of-00062.bin", + "model.layers.35.mlp.up_proj.weight": "pytorch_model-00028-of-00062.bin", + "model.layers.35.post_attention_layernorm.weight": "pytorch_model-00028-of-00062.bin", + "model.layers.35.self_attn.k_proj.weight": "pytorch_model-00028-of-00062.bin", + "model.layers.35.self_attn.o_proj.weight": "pytorch_model-00028-of-00062.bin", + "model.layers.35.self_attn.q_proj.weight": "pytorch_model-00028-of-00062.bin", + "model.layers.35.self_attn.v_proj.weight": "pytorch_model-00028-of-00062.bin", + "model.layers.36.input_layernorm.weight": "pytorch_model-00029-of-00062.bin", + "model.layers.36.mlp.down_proj.weight": "pytorch_model-00029-of-00062.bin", + "model.layers.36.mlp.gate_proj.weight": "pytorch_model-00029-of-00062.bin", + "model.layers.36.mlp.up_proj.weight": "pytorch_model-00029-of-00062.bin", + "model.layers.36.post_attention_layernorm.weight": "pytorch_model-00029-of-00062.bin", + "model.layers.36.self_attn.k_proj.weight": "pytorch_model-00028-of-00062.bin", + "model.layers.36.self_attn.o_proj.weight": "pytorch_model-00028-of-00062.bin", + "model.layers.36.self_attn.q_proj.weight": "pytorch_model-00028-of-00062.bin", + "model.layers.36.self_attn.v_proj.weight": "pytorch_model-00028-of-00062.bin", + "model.layers.37.input_layernorm.weight": "pytorch_model-00030-of-00062.bin", + "model.layers.37.mlp.down_proj.weight": "pytorch_model-00030-of-00062.bin", + "model.layers.37.mlp.gate_proj.weight": "pytorch_model-00029-of-00062.bin", + "model.layers.37.mlp.up_proj.weight": "pytorch_model-00030-of-00062.bin", + "model.layers.37.post_attention_layernorm.weight": "pytorch_model-00030-of-00062.bin", + "model.layers.37.self_attn.k_proj.weight": "pytorch_model-00029-of-00062.bin", + "model.layers.37.self_attn.o_proj.weight": "pytorch_model-00029-of-00062.bin", + "model.layers.37.self_attn.q_proj.weight": "pytorch_model-00029-of-00062.bin", + "model.layers.37.self_attn.v_proj.weight": "pytorch_model-00029-of-00062.bin", + "model.layers.38.input_layernorm.weight": "pytorch_model-00031-of-00062.bin", + "model.layers.38.mlp.down_proj.weight": "pytorch_model-00031-of-00062.bin", + "model.layers.38.mlp.gate_proj.weight": "pytorch_model-00030-of-00062.bin", + "model.layers.38.mlp.up_proj.weight": "pytorch_model-00030-of-00062.bin", + "model.layers.38.post_attention_layernorm.weight": "pytorch_model-00031-of-00062.bin", + "model.layers.38.self_attn.k_proj.weight": "pytorch_model-00030-of-00062.bin", + "model.layers.38.self_attn.o_proj.weight": "pytorch_model-00030-of-00062.bin", + "model.layers.38.self_attn.q_proj.weight": "pytorch_model-00030-of-00062.bin", + "model.layers.38.self_attn.v_proj.weight": "pytorch_model-00030-of-00062.bin", + "model.layers.39.input_layernorm.weight": "pytorch_model-00031-of-00062.bin", + "model.layers.39.mlp.down_proj.weight": "pytorch_model-00031-of-00062.bin", + "model.layers.39.mlp.gate_proj.weight": "pytorch_model-00031-of-00062.bin", + "model.layers.39.mlp.up_proj.weight": "pytorch_model-00031-of-00062.bin", + "model.layers.39.post_attention_layernorm.weight": "pytorch_model-00031-of-00062.bin", + "model.layers.39.self_attn.k_proj.weight": "pytorch_model-00031-of-00062.bin", + "model.layers.39.self_attn.o_proj.weight": "pytorch_model-00031-of-00062.bin", + "model.layers.39.self_attn.q_proj.weight": "pytorch_model-00031-of-00062.bin", + "model.layers.39.self_attn.v_proj.weight": "pytorch_model-00031-of-00062.bin", + "model.layers.4.input_layernorm.weight": "pytorch_model-00005-of-00062.bin", + "model.layers.4.mlp.down_proj.weight": "pytorch_model-00005-of-00062.bin", + "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00005-of-00062.bin", + "model.layers.4.mlp.up_proj.weight": "pytorch_model-00005-of-00062.bin", + "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00005-of-00062.bin", + "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00004-of-00062.bin", + "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00004-of-00062.bin", + "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00004-of-00062.bin", + "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00004-of-00062.bin", + "model.layers.40.input_layernorm.weight": "pytorch_model-00032-of-00062.bin", + "model.layers.40.mlp.down_proj.weight": "pytorch_model-00032-of-00062.bin", + "model.layers.40.mlp.gate_proj.weight": "pytorch_model-00032-of-00062.bin", + "model.layers.40.mlp.up_proj.weight": "pytorch_model-00032-of-00062.bin", + "model.layers.40.post_attention_layernorm.weight": "pytorch_model-00032-of-00062.bin", + "model.layers.40.self_attn.k_proj.weight": "pytorch_model-00031-of-00062.bin", + "model.layers.40.self_attn.o_proj.weight": "pytorch_model-00031-of-00062.bin", + "model.layers.40.self_attn.q_proj.weight": "pytorch_model-00031-of-00062.bin", + "model.layers.40.self_attn.v_proj.weight": "pytorch_model-00031-of-00062.bin", + "model.layers.41.input_layernorm.weight": "pytorch_model-00033-of-00062.bin", + "model.layers.41.mlp.down_proj.weight": "pytorch_model-00033-of-00062.bin", + "model.layers.41.mlp.gate_proj.weight": "pytorch_model-00032-of-00062.bin", + "model.layers.41.mlp.up_proj.weight": "pytorch_model-00033-of-00062.bin", + "model.layers.41.post_attention_layernorm.weight": "pytorch_model-00033-of-00062.bin", + "model.layers.41.self_attn.k_proj.weight": "pytorch_model-00032-of-00062.bin", + "model.layers.41.self_attn.o_proj.weight": "pytorch_model-00032-of-00062.bin", + "model.layers.41.self_attn.q_proj.weight": "pytorch_model-00032-of-00062.bin", + "model.layers.41.self_attn.v_proj.weight": "pytorch_model-00032-of-00062.bin", + "model.layers.42.input_layernorm.weight": "pytorch_model-00034-of-00062.bin", + "model.layers.42.mlp.down_proj.weight": "pytorch_model-00034-of-00062.bin", + "model.layers.42.mlp.gate_proj.weight": "pytorch_model-00033-of-00062.bin", + "model.layers.42.mlp.up_proj.weight": "pytorch_model-00033-of-00062.bin", + "model.layers.42.post_attention_layernorm.weight": "pytorch_model-00034-of-00062.bin", + "model.layers.42.self_attn.k_proj.weight": "pytorch_model-00033-of-00062.bin", + "model.layers.42.self_attn.o_proj.weight": "pytorch_model-00033-of-00062.bin", + "model.layers.42.self_attn.q_proj.weight": "pytorch_model-00033-of-00062.bin", + "model.layers.42.self_attn.v_proj.weight": "pytorch_model-00033-of-00062.bin", + "model.layers.43.input_layernorm.weight": "pytorch_model-00034-of-00062.bin", + "model.layers.43.mlp.down_proj.weight": "pytorch_model-00034-of-00062.bin", + "model.layers.43.mlp.gate_proj.weight": "pytorch_model-00034-of-00062.bin", + "model.layers.43.mlp.up_proj.weight": "pytorch_model-00034-of-00062.bin", + "model.layers.43.post_attention_layernorm.weight": "pytorch_model-00034-of-00062.bin", + "model.layers.43.self_attn.k_proj.weight": "pytorch_model-00034-of-00062.bin", + "model.layers.43.self_attn.o_proj.weight": "pytorch_model-00034-of-00062.bin", + "model.layers.43.self_attn.q_proj.weight": "pytorch_model-00034-of-00062.bin", + "model.layers.43.self_attn.v_proj.weight": "pytorch_model-00034-of-00062.bin", + "model.layers.44.input_layernorm.weight": "pytorch_model-00035-of-00062.bin", + "model.layers.44.mlp.down_proj.weight": "pytorch_model-00035-of-00062.bin", + "model.layers.44.mlp.gate_proj.weight": "pytorch_model-00035-of-00062.bin", + "model.layers.44.mlp.up_proj.weight": "pytorch_model-00035-of-00062.bin", + "model.layers.44.post_attention_layernorm.weight": "pytorch_model-00035-of-00062.bin", + "model.layers.44.self_attn.k_proj.weight": "pytorch_model-00034-of-00062.bin", + "model.layers.44.self_attn.o_proj.weight": "pytorch_model-00034-of-00062.bin", + "model.layers.44.self_attn.q_proj.weight": "pytorch_model-00034-of-00062.bin", + "model.layers.44.self_attn.v_proj.weight": "pytorch_model-00034-of-00062.bin", + "model.layers.45.input_layernorm.weight": "pytorch_model-00036-of-00062.bin", + "model.layers.45.mlp.down_proj.weight": "pytorch_model-00036-of-00062.bin", + "model.layers.45.mlp.gate_proj.weight": "pytorch_model-00035-of-00062.bin", + "model.layers.45.mlp.up_proj.weight": "pytorch_model-00036-of-00062.bin", + "model.layers.45.post_attention_layernorm.weight": "pytorch_model-00036-of-00062.bin", + "model.layers.45.self_attn.k_proj.weight": "pytorch_model-00035-of-00062.bin", + "model.layers.45.self_attn.o_proj.weight": "pytorch_model-00035-of-00062.bin", + "model.layers.45.self_attn.q_proj.weight": "pytorch_model-00035-of-00062.bin", + "model.layers.45.self_attn.v_proj.weight": "pytorch_model-00035-of-00062.bin", + "model.layers.46.input_layernorm.weight": "pytorch_model-00037-of-00062.bin", + "model.layers.46.mlp.down_proj.weight": "pytorch_model-00037-of-00062.bin", + "model.layers.46.mlp.gate_proj.weight": "pytorch_model-00036-of-00062.bin", + "model.layers.46.mlp.up_proj.weight": "pytorch_model-00036-of-00062.bin", + "model.layers.46.post_attention_layernorm.weight": "pytorch_model-00037-of-00062.bin", + "model.layers.46.self_attn.k_proj.weight": "pytorch_model-00036-of-00062.bin", + "model.layers.46.self_attn.o_proj.weight": "pytorch_model-00036-of-00062.bin", + "model.layers.46.self_attn.q_proj.weight": "pytorch_model-00036-of-00062.bin", + "model.layers.46.self_attn.v_proj.weight": "pytorch_model-00036-of-00062.bin", + "model.layers.47.input_layernorm.weight": "pytorch_model-00037-of-00062.bin", + "model.layers.47.mlp.down_proj.weight": "pytorch_model-00037-of-00062.bin", + "model.layers.47.mlp.gate_proj.weight": "pytorch_model-00037-of-00062.bin", + "model.layers.47.mlp.up_proj.weight": "pytorch_model-00037-of-00062.bin", + "model.layers.47.post_attention_layernorm.weight": "pytorch_model-00037-of-00062.bin", + "model.layers.47.self_attn.k_proj.weight": "pytorch_model-00037-of-00062.bin", + "model.layers.47.self_attn.o_proj.weight": "pytorch_model-00037-of-00062.bin", + "model.layers.47.self_attn.q_proj.weight": "pytorch_model-00037-of-00062.bin", + "model.layers.47.self_attn.v_proj.weight": "pytorch_model-00037-of-00062.bin", + "model.layers.48.input_layernorm.weight": "pytorch_model-00038-of-00062.bin", + "model.layers.48.mlp.down_proj.weight": "pytorch_model-00038-of-00062.bin", + "model.layers.48.mlp.gate_proj.weight": "pytorch_model-00038-of-00062.bin", + "model.layers.48.mlp.up_proj.weight": "pytorch_model-00038-of-00062.bin", + "model.layers.48.post_attention_layernorm.weight": "pytorch_model-00038-of-00062.bin", + "model.layers.48.self_attn.k_proj.weight": "pytorch_model-00037-of-00062.bin", + "model.layers.48.self_attn.o_proj.weight": "pytorch_model-00037-of-00062.bin", + "model.layers.48.self_attn.q_proj.weight": "pytorch_model-00037-of-00062.bin", + "model.layers.48.self_attn.v_proj.weight": "pytorch_model-00037-of-00062.bin", + "model.layers.49.input_layernorm.weight": "pytorch_model-00039-of-00062.bin", + "model.layers.49.mlp.down_proj.weight": "pytorch_model-00039-of-00062.bin", + "model.layers.49.mlp.gate_proj.weight": "pytorch_model-00038-of-00062.bin", + "model.layers.49.mlp.up_proj.weight": "pytorch_model-00039-of-00062.bin", + "model.layers.49.post_attention_layernorm.weight": "pytorch_model-00039-of-00062.bin", + "model.layers.49.self_attn.k_proj.weight": "pytorch_model-00038-of-00062.bin", + "model.layers.49.self_attn.o_proj.weight": "pytorch_model-00038-of-00062.bin", + "model.layers.49.self_attn.q_proj.weight": "pytorch_model-00038-of-00062.bin", + "model.layers.49.self_attn.v_proj.weight": "pytorch_model-00038-of-00062.bin", + "model.layers.5.input_layernorm.weight": "pytorch_model-00006-of-00062.bin", + "model.layers.5.mlp.down_proj.weight": "pytorch_model-00006-of-00062.bin", + "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00005-of-00062.bin", + "model.layers.5.mlp.up_proj.weight": "pytorch_model-00006-of-00062.bin", + "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00006-of-00062.bin", + "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00005-of-00062.bin", + "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00005-of-00062.bin", + "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00005-of-00062.bin", + "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00005-of-00062.bin", + "model.layers.50.input_layernorm.weight": "pytorch_model-00040-of-00062.bin", + "model.layers.50.mlp.down_proj.weight": "pytorch_model-00040-of-00062.bin", + "model.layers.50.mlp.gate_proj.weight": "pytorch_model-00039-of-00062.bin", + "model.layers.50.mlp.up_proj.weight": "pytorch_model-00039-of-00062.bin", + "model.layers.50.post_attention_layernorm.weight": "pytorch_model-00040-of-00062.bin", + "model.layers.50.self_attn.k_proj.weight": "pytorch_model-00039-of-00062.bin", + "model.layers.50.self_attn.o_proj.weight": "pytorch_model-00039-of-00062.bin", + "model.layers.50.self_attn.q_proj.weight": "pytorch_model-00039-of-00062.bin", + "model.layers.50.self_attn.v_proj.weight": "pytorch_model-00039-of-00062.bin", + "model.layers.51.input_layernorm.weight": "pytorch_model-00040-of-00062.bin", + "model.layers.51.mlp.down_proj.weight": "pytorch_model-00040-of-00062.bin", + "model.layers.51.mlp.gate_proj.weight": "pytorch_model-00040-of-00062.bin", + "model.layers.51.mlp.up_proj.weight": "pytorch_model-00040-of-00062.bin", + "model.layers.51.post_attention_layernorm.weight": "pytorch_model-00040-of-00062.bin", + "model.layers.51.self_attn.k_proj.weight": "pytorch_model-00040-of-00062.bin", + "model.layers.51.self_attn.o_proj.weight": "pytorch_model-00040-of-00062.bin", + "model.layers.51.self_attn.q_proj.weight": "pytorch_model-00040-of-00062.bin", + "model.layers.51.self_attn.v_proj.weight": "pytorch_model-00040-of-00062.bin", + "model.layers.52.input_layernorm.weight": "pytorch_model-00041-of-00062.bin", + "model.layers.52.mlp.down_proj.weight": "pytorch_model-00041-of-00062.bin", + "model.layers.52.mlp.gate_proj.weight": "pytorch_model-00041-of-00062.bin", + "model.layers.52.mlp.up_proj.weight": "pytorch_model-00041-of-00062.bin", + "model.layers.52.post_attention_layernorm.weight": "pytorch_model-00041-of-00062.bin", + "model.layers.52.self_attn.k_proj.weight": "pytorch_model-00040-of-00062.bin", + "model.layers.52.self_attn.o_proj.weight": "pytorch_model-00040-of-00062.bin", + "model.layers.52.self_attn.q_proj.weight": "pytorch_model-00040-of-00062.bin", + "model.layers.52.self_attn.v_proj.weight": "pytorch_model-00040-of-00062.bin", + "model.layers.53.input_layernorm.weight": "pytorch_model-00042-of-00062.bin", + "model.layers.53.mlp.down_proj.weight": "pytorch_model-00042-of-00062.bin", + "model.layers.53.mlp.gate_proj.weight": "pytorch_model-00041-of-00062.bin", + "model.layers.53.mlp.up_proj.weight": "pytorch_model-00042-of-00062.bin", + "model.layers.53.post_attention_layernorm.weight": "pytorch_model-00042-of-00062.bin", + "model.layers.53.self_attn.k_proj.weight": "pytorch_model-00041-of-00062.bin", + "model.layers.53.self_attn.o_proj.weight": "pytorch_model-00041-of-00062.bin", + "model.layers.53.self_attn.q_proj.weight": "pytorch_model-00041-of-00062.bin", + "model.layers.53.self_attn.v_proj.weight": "pytorch_model-00041-of-00062.bin", + "model.layers.54.input_layernorm.weight": "pytorch_model-00043-of-00062.bin", + "model.layers.54.mlp.down_proj.weight": "pytorch_model-00043-of-00062.bin", + "model.layers.54.mlp.gate_proj.weight": "pytorch_model-00042-of-00062.bin", + "model.layers.54.mlp.up_proj.weight": "pytorch_model-00042-of-00062.bin", + "model.layers.54.post_attention_layernorm.weight": "pytorch_model-00043-of-00062.bin", + "model.layers.54.self_attn.k_proj.weight": "pytorch_model-00042-of-00062.bin", + "model.layers.54.self_attn.o_proj.weight": "pytorch_model-00042-of-00062.bin", + "model.layers.54.self_attn.q_proj.weight": "pytorch_model-00042-of-00062.bin", + "model.layers.54.self_attn.v_proj.weight": "pytorch_model-00042-of-00062.bin", + "model.layers.55.input_layernorm.weight": "pytorch_model-00043-of-00062.bin", + "model.layers.55.mlp.down_proj.weight": "pytorch_model-00043-of-00062.bin", + "model.layers.55.mlp.gate_proj.weight": "pytorch_model-00043-of-00062.bin", + "model.layers.55.mlp.up_proj.weight": "pytorch_model-00043-of-00062.bin", + "model.layers.55.post_attention_layernorm.weight": "pytorch_model-00043-of-00062.bin", + "model.layers.55.self_attn.k_proj.weight": "pytorch_model-00043-of-00062.bin", + "model.layers.55.self_attn.o_proj.weight": "pytorch_model-00043-of-00062.bin", + "model.layers.55.self_attn.q_proj.weight": "pytorch_model-00043-of-00062.bin", + "model.layers.55.self_attn.v_proj.weight": "pytorch_model-00043-of-00062.bin", + "model.layers.56.input_layernorm.weight": "pytorch_model-00044-of-00062.bin", + "model.layers.56.mlp.down_proj.weight": "pytorch_model-00044-of-00062.bin", + "model.layers.56.mlp.gate_proj.weight": "pytorch_model-00044-of-00062.bin", + "model.layers.56.mlp.up_proj.weight": "pytorch_model-00044-of-00062.bin", + "model.layers.56.post_attention_layernorm.weight": "pytorch_model-00044-of-00062.bin", + "model.layers.56.self_attn.k_proj.weight": "pytorch_model-00043-of-00062.bin", + "model.layers.56.self_attn.o_proj.weight": "pytorch_model-00043-of-00062.bin", + "model.layers.56.self_attn.q_proj.weight": "pytorch_model-00043-of-00062.bin", + "model.layers.56.self_attn.v_proj.weight": "pytorch_model-00043-of-00062.bin", + "model.layers.57.input_layernorm.weight": "pytorch_model-00045-of-00062.bin", + "model.layers.57.mlp.down_proj.weight": "pytorch_model-00045-of-00062.bin", + "model.layers.57.mlp.gate_proj.weight": "pytorch_model-00044-of-00062.bin", + "model.layers.57.mlp.up_proj.weight": "pytorch_model-00045-of-00062.bin", + "model.layers.57.post_attention_layernorm.weight": "pytorch_model-00045-of-00062.bin", + "model.layers.57.self_attn.k_proj.weight": "pytorch_model-00044-of-00062.bin", + "model.layers.57.self_attn.o_proj.weight": "pytorch_model-00044-of-00062.bin", + "model.layers.57.self_attn.q_proj.weight": "pytorch_model-00044-of-00062.bin", + "model.layers.57.self_attn.v_proj.weight": "pytorch_model-00044-of-00062.bin", + "model.layers.58.input_layernorm.weight": "pytorch_model-00046-of-00062.bin", + "model.layers.58.mlp.down_proj.weight": "pytorch_model-00046-of-00062.bin", + "model.layers.58.mlp.gate_proj.weight": "pytorch_model-00045-of-00062.bin", + "model.layers.58.mlp.up_proj.weight": "pytorch_model-00045-of-00062.bin", + "model.layers.58.post_attention_layernorm.weight": "pytorch_model-00046-of-00062.bin", + "model.layers.58.self_attn.k_proj.weight": "pytorch_model-00045-of-00062.bin", + "model.layers.58.self_attn.o_proj.weight": "pytorch_model-00045-of-00062.bin", + "model.layers.58.self_attn.q_proj.weight": "pytorch_model-00045-of-00062.bin", + "model.layers.58.self_attn.v_proj.weight": "pytorch_model-00045-of-00062.bin", + "model.layers.59.input_layernorm.weight": "pytorch_model-00046-of-00062.bin", + "model.layers.59.mlp.down_proj.weight": "pytorch_model-00046-of-00062.bin", + "model.layers.59.mlp.gate_proj.weight": "pytorch_model-00046-of-00062.bin", + "model.layers.59.mlp.up_proj.weight": "pytorch_model-00046-of-00062.bin", + "model.layers.59.post_attention_layernorm.weight": "pytorch_model-00046-of-00062.bin", + "model.layers.59.self_attn.k_proj.weight": "pytorch_model-00046-of-00062.bin", + "model.layers.59.self_attn.o_proj.weight": "pytorch_model-00046-of-00062.bin", + "model.layers.59.self_attn.q_proj.weight": "pytorch_model-00046-of-00062.bin", + "model.layers.59.self_attn.v_proj.weight": "pytorch_model-00046-of-00062.bin", + "model.layers.6.input_layernorm.weight": "pytorch_model-00007-of-00062.bin", + "model.layers.6.mlp.down_proj.weight": "pytorch_model-00007-of-00062.bin", + "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00006-of-00062.bin", + "model.layers.6.mlp.up_proj.weight": "pytorch_model-00006-of-00062.bin", + "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00007-of-00062.bin", + "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00006-of-00062.bin", + "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00006-of-00062.bin", + "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00006-of-00062.bin", + "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00006-of-00062.bin", + "model.layers.60.input_layernorm.weight": "pytorch_model-00047-of-00062.bin", + "model.layers.60.mlp.down_proj.weight": "pytorch_model-00047-of-00062.bin", + "model.layers.60.mlp.gate_proj.weight": "pytorch_model-00047-of-00062.bin", + "model.layers.60.mlp.up_proj.weight": "pytorch_model-00047-of-00062.bin", + "model.layers.60.post_attention_layernorm.weight": "pytorch_model-00047-of-00062.bin", + "model.layers.60.self_attn.k_proj.weight": "pytorch_model-00046-of-00062.bin", + "model.layers.60.self_attn.o_proj.weight": "pytorch_model-00046-of-00062.bin", + "model.layers.60.self_attn.q_proj.weight": "pytorch_model-00046-of-00062.bin", + "model.layers.60.self_attn.v_proj.weight": "pytorch_model-00046-of-00062.bin", + "model.layers.61.input_layernorm.weight": "pytorch_model-00048-of-00062.bin", + "model.layers.61.mlp.down_proj.weight": "pytorch_model-00048-of-00062.bin", + "model.layers.61.mlp.gate_proj.weight": "pytorch_model-00047-of-00062.bin", + "model.layers.61.mlp.up_proj.weight": "pytorch_model-00048-of-00062.bin", + "model.layers.61.post_attention_layernorm.weight": "pytorch_model-00048-of-00062.bin", + "model.layers.61.self_attn.k_proj.weight": "pytorch_model-00047-of-00062.bin", + "model.layers.61.self_attn.o_proj.weight": "pytorch_model-00047-of-00062.bin", + "model.layers.61.self_attn.q_proj.weight": "pytorch_model-00047-of-00062.bin", + "model.layers.61.self_attn.v_proj.weight": "pytorch_model-00047-of-00062.bin", + "model.layers.62.input_layernorm.weight": "pytorch_model-00049-of-00062.bin", + "model.layers.62.mlp.down_proj.weight": "pytorch_model-00049-of-00062.bin", + "model.layers.62.mlp.gate_proj.weight": "pytorch_model-00048-of-00062.bin", + "model.layers.62.mlp.up_proj.weight": "pytorch_model-00048-of-00062.bin", + "model.layers.62.post_attention_layernorm.weight": "pytorch_model-00049-of-00062.bin", + "model.layers.62.self_attn.k_proj.weight": "pytorch_model-00048-of-00062.bin", + "model.layers.62.self_attn.o_proj.weight": "pytorch_model-00048-of-00062.bin", + "model.layers.62.self_attn.q_proj.weight": "pytorch_model-00048-of-00062.bin", + "model.layers.62.self_attn.v_proj.weight": "pytorch_model-00048-of-00062.bin", + "model.layers.63.input_layernorm.weight": "pytorch_model-00049-of-00062.bin", + "model.layers.63.mlp.down_proj.weight": "pytorch_model-00049-of-00062.bin", + "model.layers.63.mlp.gate_proj.weight": "pytorch_model-00049-of-00062.bin", + "model.layers.63.mlp.up_proj.weight": "pytorch_model-00049-of-00062.bin", + "model.layers.63.post_attention_layernorm.weight": "pytorch_model-00049-of-00062.bin", + "model.layers.63.self_attn.k_proj.weight": "pytorch_model-00049-of-00062.bin", + "model.layers.63.self_attn.o_proj.weight": "pytorch_model-00049-of-00062.bin", + "model.layers.63.self_attn.q_proj.weight": "pytorch_model-00049-of-00062.bin", + "model.layers.63.self_attn.v_proj.weight": "pytorch_model-00049-of-00062.bin", + "model.layers.64.input_layernorm.weight": "pytorch_model-00050-of-00062.bin", + "model.layers.64.mlp.down_proj.weight": "pytorch_model-00050-of-00062.bin", + "model.layers.64.mlp.gate_proj.weight": "pytorch_model-00050-of-00062.bin", + "model.layers.64.mlp.up_proj.weight": "pytorch_model-00050-of-00062.bin", + "model.layers.64.post_attention_layernorm.weight": "pytorch_model-00050-of-00062.bin", + "model.layers.64.self_attn.k_proj.weight": "pytorch_model-00049-of-00062.bin", + "model.layers.64.self_attn.o_proj.weight": "pytorch_model-00049-of-00062.bin", + "model.layers.64.self_attn.q_proj.weight": "pytorch_model-00049-of-00062.bin", + "model.layers.64.self_attn.v_proj.weight": "pytorch_model-00049-of-00062.bin", + "model.layers.65.input_layernorm.weight": "pytorch_model-00051-of-00062.bin", + "model.layers.65.mlp.down_proj.weight": "pytorch_model-00051-of-00062.bin", + "model.layers.65.mlp.gate_proj.weight": "pytorch_model-00050-of-00062.bin", + "model.layers.65.mlp.up_proj.weight": "pytorch_model-00051-of-00062.bin", + "model.layers.65.post_attention_layernorm.weight": "pytorch_model-00051-of-00062.bin", + "model.layers.65.self_attn.k_proj.weight": "pytorch_model-00050-of-00062.bin", + "model.layers.65.self_attn.o_proj.weight": "pytorch_model-00050-of-00062.bin", + "model.layers.65.self_attn.q_proj.weight": "pytorch_model-00050-of-00062.bin", + "model.layers.65.self_attn.v_proj.weight": "pytorch_model-00050-of-00062.bin", + "model.layers.66.input_layernorm.weight": "pytorch_model-00052-of-00062.bin", + "model.layers.66.mlp.down_proj.weight": "pytorch_model-00052-of-00062.bin", + "model.layers.66.mlp.gate_proj.weight": "pytorch_model-00051-of-00062.bin", + "model.layers.66.mlp.up_proj.weight": "pytorch_model-00051-of-00062.bin", + "model.layers.66.post_attention_layernorm.weight": "pytorch_model-00052-of-00062.bin", + "model.layers.66.self_attn.k_proj.weight": "pytorch_model-00051-of-00062.bin", + "model.layers.66.self_attn.o_proj.weight": "pytorch_model-00051-of-00062.bin", + "model.layers.66.self_attn.q_proj.weight": "pytorch_model-00051-of-00062.bin", + "model.layers.66.self_attn.v_proj.weight": "pytorch_model-00051-of-00062.bin", + "model.layers.67.input_layernorm.weight": "pytorch_model-00052-of-00062.bin", + "model.layers.67.mlp.down_proj.weight": "pytorch_model-00052-of-00062.bin", + "model.layers.67.mlp.gate_proj.weight": "pytorch_model-00052-of-00062.bin", + "model.layers.67.mlp.up_proj.weight": "pytorch_model-00052-of-00062.bin", + "model.layers.67.post_attention_layernorm.weight": "pytorch_model-00052-of-00062.bin", + "model.layers.67.self_attn.k_proj.weight": "pytorch_model-00052-of-00062.bin", + "model.layers.67.self_attn.o_proj.weight": "pytorch_model-00052-of-00062.bin", + "model.layers.67.self_attn.q_proj.weight": "pytorch_model-00052-of-00062.bin", + "model.layers.67.self_attn.v_proj.weight": "pytorch_model-00052-of-00062.bin", + "model.layers.68.input_layernorm.weight": "pytorch_model-00053-of-00062.bin", + "model.layers.68.mlp.down_proj.weight": "pytorch_model-00053-of-00062.bin", + "model.layers.68.mlp.gate_proj.weight": "pytorch_model-00053-of-00062.bin", + "model.layers.68.mlp.up_proj.weight": "pytorch_model-00053-of-00062.bin", + "model.layers.68.post_attention_layernorm.weight": "pytorch_model-00053-of-00062.bin", + "model.layers.68.self_attn.k_proj.weight": "pytorch_model-00052-of-00062.bin", + "model.layers.68.self_attn.o_proj.weight": "pytorch_model-00052-of-00062.bin", + "model.layers.68.self_attn.q_proj.weight": "pytorch_model-00052-of-00062.bin", + "model.layers.68.self_attn.v_proj.weight": "pytorch_model-00052-of-00062.bin", + "model.layers.69.input_layernorm.weight": "pytorch_model-00054-of-00062.bin", + "model.layers.69.mlp.down_proj.weight": "pytorch_model-00054-of-00062.bin", + "model.layers.69.mlp.gate_proj.weight": "pytorch_model-00053-of-00062.bin", + "model.layers.69.mlp.up_proj.weight": "pytorch_model-00054-of-00062.bin", + "model.layers.69.post_attention_layernorm.weight": "pytorch_model-00054-of-00062.bin", + "model.layers.69.self_attn.k_proj.weight": "pytorch_model-00053-of-00062.bin", + "model.layers.69.self_attn.o_proj.weight": "pytorch_model-00053-of-00062.bin", + "model.layers.69.self_attn.q_proj.weight": "pytorch_model-00053-of-00062.bin", + "model.layers.69.self_attn.v_proj.weight": "pytorch_model-00053-of-00062.bin", + "model.layers.7.input_layernorm.weight": "pytorch_model-00007-of-00062.bin", + "model.layers.7.mlp.down_proj.weight": "pytorch_model-00007-of-00062.bin", + "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00007-of-00062.bin", + "model.layers.7.mlp.up_proj.weight": "pytorch_model-00007-of-00062.bin", + "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00007-of-00062.bin", + "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00007-of-00062.bin", + "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00007-of-00062.bin", + "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00007-of-00062.bin", + "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00007-of-00062.bin", + "model.layers.70.input_layernorm.weight": "pytorch_model-00055-of-00062.bin", + "model.layers.70.mlp.down_proj.weight": "pytorch_model-00055-of-00062.bin", + "model.layers.70.mlp.gate_proj.weight": "pytorch_model-00054-of-00062.bin", + "model.layers.70.mlp.up_proj.weight": "pytorch_model-00054-of-00062.bin", + "model.layers.70.post_attention_layernorm.weight": "pytorch_model-00055-of-00062.bin", + "model.layers.70.self_attn.k_proj.weight": "pytorch_model-00054-of-00062.bin", + "model.layers.70.self_attn.o_proj.weight": "pytorch_model-00054-of-00062.bin", + "model.layers.70.self_attn.q_proj.weight": "pytorch_model-00054-of-00062.bin", + "model.layers.70.self_attn.v_proj.weight": "pytorch_model-00054-of-00062.bin", + "model.layers.71.input_layernorm.weight": "pytorch_model-00055-of-00062.bin", + "model.layers.71.mlp.down_proj.weight": "pytorch_model-00055-of-00062.bin", + "model.layers.71.mlp.gate_proj.weight": "pytorch_model-00055-of-00062.bin", + "model.layers.71.mlp.up_proj.weight": "pytorch_model-00055-of-00062.bin", + "model.layers.71.post_attention_layernorm.weight": "pytorch_model-00055-of-00062.bin", + "model.layers.71.self_attn.k_proj.weight": "pytorch_model-00055-of-00062.bin", + "model.layers.71.self_attn.o_proj.weight": "pytorch_model-00055-of-00062.bin", + "model.layers.71.self_attn.q_proj.weight": "pytorch_model-00055-of-00062.bin", + "model.layers.71.self_attn.v_proj.weight": "pytorch_model-00055-of-00062.bin", + "model.layers.72.input_layernorm.weight": "pytorch_model-00056-of-00062.bin", + "model.layers.72.mlp.down_proj.weight": "pytorch_model-00056-of-00062.bin", + "model.layers.72.mlp.gate_proj.weight": "pytorch_model-00056-of-00062.bin", + "model.layers.72.mlp.up_proj.weight": "pytorch_model-00056-of-00062.bin", + "model.layers.72.post_attention_layernorm.weight": "pytorch_model-00056-of-00062.bin", + "model.layers.72.self_attn.k_proj.weight": "pytorch_model-00055-of-00062.bin", + "model.layers.72.self_attn.o_proj.weight": "pytorch_model-00055-of-00062.bin", + "model.layers.72.self_attn.q_proj.weight": "pytorch_model-00055-of-00062.bin", + "model.layers.72.self_attn.v_proj.weight": "pytorch_model-00055-of-00062.bin", + "model.layers.73.input_layernorm.weight": "pytorch_model-00057-of-00062.bin", + "model.layers.73.mlp.down_proj.weight": "pytorch_model-00057-of-00062.bin", + "model.layers.73.mlp.gate_proj.weight": "pytorch_model-00056-of-00062.bin", + "model.layers.73.mlp.up_proj.weight": "pytorch_model-00057-of-00062.bin", + "model.layers.73.post_attention_layernorm.weight": "pytorch_model-00057-of-00062.bin", + "model.layers.73.self_attn.k_proj.weight": "pytorch_model-00056-of-00062.bin", + "model.layers.73.self_attn.o_proj.weight": "pytorch_model-00056-of-00062.bin", + "model.layers.73.self_attn.q_proj.weight": "pytorch_model-00056-of-00062.bin", + "model.layers.73.self_attn.v_proj.weight": "pytorch_model-00056-of-00062.bin", + "model.layers.74.input_layernorm.weight": "pytorch_model-00058-of-00062.bin", + "model.layers.74.mlp.down_proj.weight": "pytorch_model-00058-of-00062.bin", + "model.layers.74.mlp.gate_proj.weight": "pytorch_model-00057-of-00062.bin", + "model.layers.74.mlp.up_proj.weight": "pytorch_model-00057-of-00062.bin", + "model.layers.74.post_attention_layernorm.weight": "pytorch_model-00058-of-00062.bin", + "model.layers.74.self_attn.k_proj.weight": "pytorch_model-00057-of-00062.bin", + "model.layers.74.self_attn.o_proj.weight": "pytorch_model-00057-of-00062.bin", + "model.layers.74.self_attn.q_proj.weight": "pytorch_model-00057-of-00062.bin", + "model.layers.74.self_attn.v_proj.weight": "pytorch_model-00057-of-00062.bin", + "model.layers.75.input_layernorm.weight": "pytorch_model-00058-of-00062.bin", + "model.layers.75.mlp.down_proj.weight": "pytorch_model-00058-of-00062.bin", + "model.layers.75.mlp.gate_proj.weight": "pytorch_model-00058-of-00062.bin", + "model.layers.75.mlp.up_proj.weight": "pytorch_model-00058-of-00062.bin", + "model.layers.75.post_attention_layernorm.weight": "pytorch_model-00058-of-00062.bin", + "model.layers.75.self_attn.k_proj.weight": "pytorch_model-00058-of-00062.bin", + "model.layers.75.self_attn.o_proj.weight": "pytorch_model-00058-of-00062.bin", + "model.layers.75.self_attn.q_proj.weight": "pytorch_model-00058-of-00062.bin", + "model.layers.75.self_attn.v_proj.weight": "pytorch_model-00058-of-00062.bin", + "model.layers.76.input_layernorm.weight": "pytorch_model-00059-of-00062.bin", + "model.layers.76.mlp.down_proj.weight": "pytorch_model-00059-of-00062.bin", + "model.layers.76.mlp.gate_proj.weight": "pytorch_model-00059-of-00062.bin", + "model.layers.76.mlp.up_proj.weight": "pytorch_model-00059-of-00062.bin", + "model.layers.76.post_attention_layernorm.weight": "pytorch_model-00059-of-00062.bin", + "model.layers.76.self_attn.k_proj.weight": "pytorch_model-00058-of-00062.bin", + "model.layers.76.self_attn.o_proj.weight": "pytorch_model-00058-of-00062.bin", + "model.layers.76.self_attn.q_proj.weight": "pytorch_model-00058-of-00062.bin", + "model.layers.76.self_attn.v_proj.weight": "pytorch_model-00058-of-00062.bin", + "model.layers.77.input_layernorm.weight": "pytorch_model-00060-of-00062.bin", + "model.layers.77.mlp.down_proj.weight": "pytorch_model-00060-of-00062.bin", + "model.layers.77.mlp.gate_proj.weight": "pytorch_model-00059-of-00062.bin", + "model.layers.77.mlp.up_proj.weight": "pytorch_model-00060-of-00062.bin", + "model.layers.77.post_attention_layernorm.weight": "pytorch_model-00060-of-00062.bin", + "model.layers.77.self_attn.k_proj.weight": "pytorch_model-00059-of-00062.bin", + "model.layers.77.self_attn.o_proj.weight": "pytorch_model-00059-of-00062.bin", + "model.layers.77.self_attn.q_proj.weight": "pytorch_model-00059-of-00062.bin", + "model.layers.77.self_attn.v_proj.weight": "pytorch_model-00059-of-00062.bin", + "model.layers.78.input_layernorm.weight": "pytorch_model-00061-of-00062.bin", + "model.layers.78.mlp.down_proj.weight": "pytorch_model-00061-of-00062.bin", + "model.layers.78.mlp.gate_proj.weight": "pytorch_model-00060-of-00062.bin", + "model.layers.78.mlp.up_proj.weight": "pytorch_model-00060-of-00062.bin", + "model.layers.78.post_attention_layernorm.weight": "pytorch_model-00061-of-00062.bin", + "model.layers.78.self_attn.k_proj.weight": "pytorch_model-00060-of-00062.bin", + "model.layers.78.self_attn.o_proj.weight": "pytorch_model-00060-of-00062.bin", + "model.layers.78.self_attn.q_proj.weight": "pytorch_model-00060-of-00062.bin", + "model.layers.78.self_attn.v_proj.weight": "pytorch_model-00060-of-00062.bin", + "model.layers.79.input_layernorm.weight": "pytorch_model-00061-of-00062.bin", + "model.layers.79.mlp.down_proj.weight": "pytorch_model-00061-of-00062.bin", + "model.layers.79.mlp.gate_proj.weight": "pytorch_model-00061-of-00062.bin", + "model.layers.79.mlp.up_proj.weight": "pytorch_model-00061-of-00062.bin", + "model.layers.79.post_attention_layernorm.weight": "pytorch_model-00061-of-00062.bin", + "model.layers.79.self_attn.k_proj.weight": "pytorch_model-00061-of-00062.bin", + "model.layers.79.self_attn.o_proj.weight": "pytorch_model-00061-of-00062.bin", + "model.layers.79.self_attn.q_proj.weight": "pytorch_model-00061-of-00062.bin", + "model.layers.79.self_attn.v_proj.weight": "pytorch_model-00061-of-00062.bin", + "model.layers.8.input_layernorm.weight": "pytorch_model-00008-of-00062.bin", + "model.layers.8.mlp.down_proj.weight": "pytorch_model-00008-of-00062.bin", + "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00008-of-00062.bin", + "model.layers.8.mlp.up_proj.weight": "pytorch_model-00008-of-00062.bin", + "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00008-of-00062.bin", + "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00007-of-00062.bin", + "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00007-of-00062.bin", + "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00007-of-00062.bin", + "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00007-of-00062.bin", + "model.layers.9.input_layernorm.weight": "pytorch_model-00009-of-00062.bin", + "model.layers.9.mlp.down_proj.weight": "pytorch_model-00009-of-00062.bin", + "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00008-of-00062.bin", + "model.layers.9.mlp.up_proj.weight": "pytorch_model-00009-of-00062.bin", + "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00009-of-00062.bin", + "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00008-of-00062.bin", + "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00008-of-00062.bin", + "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00008-of-00062.bin", + "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00008-of-00062.bin", + "model.norm.weight": "pytorch_model-00061-of-00062.bin" + } +}