diff --git a/config.json b/config.json old mode 100644 new mode 100755 index baec789f6cdfa169ff32b825679566f15d605dac..7426c64f8261f653473e45d7b8982f6435018515 --- a/config.json +++ b/config.json @@ -1,5 +1,5 @@ { - "_name_or_path": "/opt/llama3.1/Meta-Llama-3.1-405B-Instruct/", + "_name_or_path": "/opt/zhiyu_hf_ckpts/Llama-3.1-405B-Instruct/", "architectures": [ "LlamaForCausalLM" ], @@ -33,7 +33,7 @@ "rope_theta": 500000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", - "transformers_version": "4.43.4", + "transformers_version": "4.44.0", "use_cache": true, "vocab_size": 128256 } diff --git a/generation_config.json b/generation_config.json old mode 100644 new mode 100755 index 57a143f2aec52fb1e288ab850ad0d13bb74becaf..7769ad8f10361b751d5660ec61d865aa109f1dc6 --- a/generation_config.json +++ b/generation_config.json @@ -8,5 +8,5 @@ ], "temperature": 0.6, "top_p": 0.9, - "transformers_version": "4.43.4" + "transformers_version": "4.44.0" } diff --git a/hf_quant_config.json b/hf_quant_config.json old mode 100644 new mode 100755 index 8157c8ddcaac48c2b7cf4717c636dec9dbd8373a..3b1072ce6584257888b6012f35bd5cc846490f4b --- a/hf_quant_config.json +++ b/hf_quant_config.json @@ -1,10 +1,13 @@ { "producer": { "name": "modelopt", - "version": "0.17.0" + "version": "0.23.0" }, "quantization": { "quant_algo": "FP8", - "kv_cache_quant_algo": null + "kv_cache_quant_algo": "FP8", + "exclude_modules": [ + "lm_head" + ] } } diff --git a/model-00001-of-00086.safetensors b/model-00001-of-00086.safetensors old mode 100644 new mode 100755 index 0deba5632a5b7425e23ee00a753b30894a2afb90..1320828d0487111d420cb74eb463a4a83f89c48d --- a/model-00001-of-00086.safetensors +++ b/model-00001-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:abc799746b86eb40c887f3b66c1f1df1b66c8925dcd6ce372cc21193c6a54fad -size 4773119376 +oid sha256:e11f1350fcf9c91aacb83b2c79d0ccc0ac8150565055a23a8cb3b3bf333668ff +size 4773119568 diff --git a/model-00002-of-00086.safetensors b/model-00002-of-00086.safetensors old mode 100644 new mode 100755 index 5f4443feb2f223919f1064b551349dc6a89ee2be..95de338035bbf5c378f004e87c8affca8b02169f --- a/model-00002-of-00086.safetensors +++ b/model-00002-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3917a62ef49efce3331d1621f6409c5466b17fbe757cc00bdefafd2180dc8236 -size 4932570128 +oid sha256:7a86a804c2320b1f55f2476249a63d928c946cb98faaa248cee6228f1f715ee2 +size 4932570320 diff --git a/model-00003-of-00086.safetensors b/model-00003-of-00086.safetensors old mode 100644 new mode 100755 index 69cb7849d3e78f67e1333f2fba00bb001bc89200..bea15eed8a57cc189fc12038cd42a2348563cc4d --- a/model-00003-of-00086.safetensors +++ b/model-00003-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a46ee529d528b6dfec63878fb7e65398fc8887ce3249504031757193c5cde7d2 -size 4630646976 +oid sha256:1e28d5899c266dd2fa07c9deef8e76695650746bf5c905855e4255b6ce72d017 +size 4630647368 diff --git a/model-00004-of-00086.safetensors b/model-00004-of-00086.safetensors old mode 100644 new mode 100755 index 2c1ff5ec6b6bfafe3a67b3d674228caa85eca907..5e30b8ce8acde41638567d561f7970c53fe8d693 --- a/model-00004-of-00086.safetensors +++ b/model-00004-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7db50ebf9f0f94dede49b1b4f100cf93766f512fb3d457d85eec3f629e50319b -size 4932570128 +oid sha256:2d9b75e900c3b270c8cd46952191f97e27edf3d197e0a9b3e573d87c7c744b91 +size 4932570320 diff --git a/model-00005-of-00086.safetensors b/model-00005-of-00086.safetensors old mode 100644 new mode 100755 index b166b2df3a9b309b2e32a201ff3c901ee0363f51..c7a1c67b25ab301be89e4a20abdaeb0e3888bfd8 --- a/model-00005-of-00086.safetensors +++ b/model-00005-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4a377187b3de8e1c296b21fefb4b0e88fa9e7d0eb4cb94f2862065384c3cccb5 -size 4630646976 +oid sha256:9cb6b4442b20f76085ac5bcb5431cb59acfb0bf03fb5cb59cc050a691e77534a +size 4630647368 diff --git a/model-00006-of-00086.safetensors b/model-00006-of-00086.safetensors old mode 100644 new mode 100755 index cd8dd1bac41227dc7acf88bb5a959592f33a6d22..825d5c1892a875a7d6e25d323bb0a9ad35f6d202 --- a/model-00006-of-00086.safetensors +++ b/model-00006-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cab4217ce8fa627e56f1c60cf4e6184c495c0be8fa18eb960cedd7a9788daeed -size 4932570128 +oid sha256:dbc87e74a9d4cfbc4a668ab36ce6a051789496de51bb3e5872bf7e7ae65e793b +size 4932570320 diff --git a/model-00007-of-00086.safetensors b/model-00007-of-00086.safetensors old mode 100644 new mode 100755 index 547eb5d4a0680930d1caa0df532261d735a1d20b..6ab57d908b2941c587c03f9b3171c239b26cc3c8 --- a/model-00007-of-00086.safetensors +++ b/model-00007-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bd2a4ad3ee10bede80e5e8f3340f4a063a0d45f02a5c0c2cd77ed3708ed89f30 -size 4630646976 +oid sha256:e289de1f866097d30b4573f1ae7e31ce607e7597022e6661dd4e5e701e584726 +size 4630647368 diff --git a/model-00008-of-00086.safetensors b/model-00008-of-00086.safetensors old mode 100644 new mode 100755 index e1f2dcbecb3e165c14fbd1b80824ce287d953540..64625fca7221eb9a6ed95f16f2d8d6c4cb9a85ef --- a/model-00008-of-00086.safetensors +++ b/model-00008-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:341eeb2f36f63dcd86cd081bd6b1de8355a52eecd3facddb355b5b9d2576d3dc -size 4932570144 +oid sha256:41808a194d3de085e2a570b9e94923ca87f95143b48bfa994cc3ac3fb7ec2f64 +size 4932570344 diff --git a/model-00009-of-00086.safetensors b/model-00009-of-00086.safetensors old mode 100644 new mode 100755 index 53f859fdea12e0d307df8d248d2a63af7596fb4c..38127921c160ad70598c47b0499a34a422ed10c2 --- a/model-00009-of-00086.safetensors +++ b/model-00009-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0b32ee8cba42ec8696b9de04a206aca20d5ef7f2c77b56e804162a5c0cb4e938 -size 4630647016 +oid sha256:d0d24023bf529049c78cd2da0a5f526f217c60a417fb6420e6cf03908b383785 +size 4630647408 diff --git a/model-00010-of-00086.safetensors b/model-00010-of-00086.safetensors old mode 100644 new mode 100755 index 4cfa146a6a7bcb29401c2db216ae9a61aa0140a4..f6f50102c6302e76c3620e33a4db7df04f274a54 --- a/model-00010-of-00086.safetensors +++ b/model-00010-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4413283a100aa3138b9fddd530c585fe3a881cce69df617ebfbc6808d71a1d62 -size 4932570160 +oid sha256:84f06d8d2e3b3caa53ed56a9810bf81fdd4f7b008be41991d9b3b898f3e236c3 +size 4932570352 diff --git a/model-00011-of-00086.safetensors b/model-00011-of-00086.safetensors old mode 100644 new mode 100755 index 50b44c0e9487006cb446a5fdac96c33d5daad326..1d95dc8443cc297f14fe3e2c7cc4f9282bb78b44 --- a/model-00011-of-00086.safetensors +++ b/model-00011-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0f3df58f7599098b9d444c9e0ed1119509adcb11fba4aae66a2bd5c4ef336c32 -size 4630647016 +oid sha256:4ac6b7c2f2a5ac453821b58d711b571cc764b95206b6f4ef3f20c2c40fc59806 +size 4630647408 diff --git a/model-00012-of-00086.safetensors b/model-00012-of-00086.safetensors old mode 100644 new mode 100755 index 05f5c8d7612d0fdf01601056367375717747b454..7e9356d6bd5c81b19e6720fe6ee8b9a051674251 --- a/model-00012-of-00086.safetensors +++ b/model-00012-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d7e2cf786c5195d3a59b33faf8e73fb39bb691957fab0d9811e35517e998f086 -size 4932570160 +oid sha256:7f3fc3e1cb98f8ad5719a14573ae6223df6da13530bbcf00df8b6667102e7c8a +size 4932570352 diff --git a/model-00013-of-00086.safetensors b/model-00013-of-00086.safetensors old mode 100644 new mode 100755 index b47dd78268d640d11a592f951b80f07323e65580..018aed74b1076045e41115793cf215701ce1c988 --- a/model-00013-of-00086.safetensors +++ b/model-00013-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d729c1da0bc71319f56a0dbafc6d2d7f1a0bf25f3d47dc69298a32bdcfa55b4f -size 4630647016 +oid sha256:038c119c4472d4bee287143edd363a44065d412544694965693fd7f54e4b7112 +size 4630647408 diff --git a/model-00014-of-00086.safetensors b/model-00014-of-00086.safetensors old mode 100644 new mode 100755 index 6d66db7f0ffdfeee5907d5b26a52614b2b9ff8c0..117cc0906dd6d154c27b20b037467a0480f8f2d6 --- a/model-00014-of-00086.safetensors +++ b/model-00014-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:89cefccb0925f1b897097ac951c3c5d546453413d1747e18fb2377753e6da8b9 -size 4932570160 +oid sha256:4d8889571a779f7ac1ba0f396aa84a5ab76f32f71cc618a674a4fc5fbd78d3b6 +size 4932570352 diff --git a/model-00015-of-00086.safetensors b/model-00015-of-00086.safetensors old mode 100644 new mode 100755 index 4571131c46a1c1badc7f0c55f7ed9aa36456b5e1..0e517042c5fe4ada35a2de8c2caaec366ad49eaf --- a/model-00015-of-00086.safetensors +++ b/model-00015-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ab1e9fabbe6a02391b5e614dcfbaf30cabf5c27b973cc3692fc73f17d378d214 -size 4630647016 +oid sha256:b5dc7c3ddfa4959a780080edf132e4595966386dc5fc9db3bd4ddb3a983b6b2b +size 4630647408 diff --git a/model-00016-of-00086.safetensors b/model-00016-of-00086.safetensors old mode 100644 new mode 100755 index 98b552528384dbe6fc20794c6f63273a820c2767..424633a2cf9971d436223ce1c577862e515a2064 --- a/model-00016-of-00086.safetensors +++ b/model-00016-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:99e71eacfd0a418738f1e2e12bbd533058f9c8ef9c7fe6d58315284254abe476 -size 4932570160 +oid sha256:dee055c595a6d29fe7ca1a4350e8fb78605be72fd8c0b79d115fed9b489975f5 +size 4932570352 diff --git a/model-00017-of-00086.safetensors b/model-00017-of-00086.safetensors old mode 100644 new mode 100755 index 17a8cc8c6c00915ba54c15d77c4c8fcc7213b732..0031fb713a60e21dbd6a8da702c069ae1944bf7e --- a/model-00017-of-00086.safetensors +++ b/model-00017-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3bc77d66e6ede3e0e10b225b732d7a52f53f4124eb49201698ba5741404b164a -size 4630647016 +oid sha256:78b3d7c1c9bec08486fd4139d812d06d5925b36006af9dea08b529383ee6982d +size 4630647408 diff --git a/model-00018-of-00086.safetensors b/model-00018-of-00086.safetensors old mode 100644 new mode 100755 index 7e7721b6e62f1db918350a58e44a8cc0ce414593..3e00600c7621ef9223a6caececca79b6958c5556 --- a/model-00018-of-00086.safetensors +++ b/model-00018-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a63590a0766446e17835a4b3d3407688b0c2c795f9eabc09097bdb0274acd42d -size 4932570160 +oid sha256:c76cb2b73d2f0c43ea00d1da5f5d414522ae2ea55702567c896e3da248f249c3 +size 4932570352 diff --git a/model-00019-of-00086.safetensors b/model-00019-of-00086.safetensors old mode 100644 new mode 100755 index b1db7e90be3d3b18c3cdac9468bae32b4645d4b5..02aec661d8d9ac1782cdbaea0ab9f336791655e5 --- a/model-00019-of-00086.safetensors +++ b/model-00019-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:44b7f24cc86576cc9b2ba0b6e9d06f77bfd2d99f34b9a08e139b047f1c916a48 -size 4630647016 +oid sha256:b3262d7108147e44b9f3edf0bbcd3015f8901fe71673039c4011f58556b92349 +size 4630647408 diff --git a/model-00020-of-00086.safetensors b/model-00020-of-00086.safetensors old mode 100644 new mode 100755 index a602d7f98df06e26f7ed85b4ac0bc67a87da0306..9a6cb0a2f7ac98539a090f9f75fa442d4b1651ed --- a/model-00020-of-00086.safetensors +++ b/model-00020-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:cbf3b420b673a88769315d8ce192807bc38dbc849c3ae54567d620aaec93b7ed -size 4932570160 +oid sha256:8609584cf4b09d48cc795ad61ab2f766ec25c15f625a0d77549d413111ada2ac +size 4932570352 diff --git a/model-00021-of-00086.safetensors b/model-00021-of-00086.safetensors old mode 100644 new mode 100755 index d6bafd7fdb4c2c9e0fc7a7b407ea032b1ededf3f..0517e800da4e8d23050dc7b883d2e29d73d04200 --- a/model-00021-of-00086.safetensors +++ b/model-00021-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fa037061ff66d64fa0a3152c32bf2452cac8a87848a99fa50b1b894d6806d9af -size 4630647016 +oid sha256:0000ff0499b9976cc39b7f73a1758803cb6d3cd002bf437088afb31bdab444c3 +size 4630647408 diff --git a/model-00022-of-00086.safetensors b/model-00022-of-00086.safetensors old mode 100644 new mode 100755 index 693b6998932239e191c76f4942547ed4250463f4..280212b2dbedc5fc717da61cfebf5214e62f6cb2 --- a/model-00022-of-00086.safetensors +++ b/model-00022-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:48e9e21b78f808c4726a896d3791a26c787c053bc2182e109ce5b3b2c5e81ca1 -size 4932570160 +oid sha256:4c016cb4a3dd7b52685a8e5810706afb32299a1793e1753f0ee3d81c9abb1893 +size 4932570352 diff --git a/model-00023-of-00086.safetensors b/model-00023-of-00086.safetensors old mode 100644 new mode 100755 index e37c7ac478094bf04c7fbad00154961ef73882be..1130fa9a8f310fa5db7513086fb05fc99618f58d --- a/model-00023-of-00086.safetensors +++ b/model-00023-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:325fc87cb641c0f8632170b4d2ecbb8a790fa42cf4d52c416d86c5389049b949 -size 4630647016 +oid sha256:e995dbc418c34dcf63485ac7ceca19b24ea298c7f199073381c5efc71cdc3091 +size 4630647408 diff --git a/model-00024-of-00086.safetensors b/model-00024-of-00086.safetensors old mode 100644 new mode 100755 index faeb1f429ff0fe3d99d4a10e1aec7e482924466a..d32f7f09122856924c5a08b5224ddab5882da866 --- a/model-00024-of-00086.safetensors +++ b/model-00024-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c8faaeba1c8166d9ab926d84a05672e6adeb687e03b20724e929945d51f01ca9 -size 4932570160 +oid sha256:1f35c16c9ef3a60e0efaf1ab477da83514ca7cac4ba56ead1722cd90222129b6 +size 4932570352 diff --git a/model-00025-of-00086.safetensors b/model-00025-of-00086.safetensors old mode 100644 new mode 100755 index e36b672b65107acd3d5533aeaf08f0fe9ab62949..9bdfc471f41ab83b67e885aa749db072bcae21f1 --- a/model-00025-of-00086.safetensors +++ b/model-00025-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4709e6e48efaf8c73933f06ac7e90f689ef07b80811707291e241499139dca17 -size 4630647016 +oid sha256:393a510765775377c0511cdc73b65d4bfb275670bdae0ec08189fab84fc6549f +size 4630647408 diff --git a/model-00026-of-00086.safetensors b/model-00026-of-00086.safetensors old mode 100644 new mode 100755 index ed255871e7e4e98f2f71ebcfa313749ce9945918..70e25df75f8ba25797601caf9ab1168226991558 --- a/model-00026-of-00086.safetensors +++ b/model-00026-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7d29e50dd7fb1d407180bf86bc641a216a77b4be74af130bb930449e3c634347 -size 4932570160 +oid sha256:156e113e9dff968246466f63649fe11def50b5cd4946bf5b7c136640a96ed531 +size 4932570352 diff --git a/model-00027-of-00086.safetensors b/model-00027-of-00086.safetensors old mode 100644 new mode 100755 index 7d748bd6e97bf529f7d1cf28a164480c7b395bd9..21d8bf13128fd6ce2d693369f865b31b34a9b462 --- a/model-00027-of-00086.safetensors +++ b/model-00027-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6175ba8c92031a46d81ab5b815724110f155fa6efdf00e162e58b5b92dc04a5f -size 4630647016 +oid sha256:7e6c093a2aa3ae41785d4228710c926784adde60f82c4e3f016456716512703f +size 4630647408 diff --git a/model-00028-of-00086.safetensors b/model-00028-of-00086.safetensors old mode 100644 new mode 100755 index 240228c57078b372e59b90940f87d007a124dfb9..df409c8d46343a2c056a530e6cbc24a90cda5602 --- a/model-00028-of-00086.safetensors +++ b/model-00028-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0b08da7afe4edfc8863829ece066ad2306d31e9190cae1f7498e0ada93a0efdc -size 4932570160 +oid sha256:e6303f56439639b9303bfc865ad9e057e47aa9012b079c751bddb03b8e0c4757 +size 4932570352 diff --git a/model-00029-of-00086.safetensors b/model-00029-of-00086.safetensors old mode 100644 new mode 100755 index d18099fa61632d3ff0ce0511e186dab3852013bb..8d7629c707eec554c9c82eb6a4efb3b91541e128 --- a/model-00029-of-00086.safetensors +++ b/model-00029-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:375bc82d48d042ab84f72e6c986b954b865286b15b5f364b0c23c1526d7105ab -size 4630647016 +oid sha256:4ef657456afecc5818846f5b31b6d6534cf5c7891e52cfb9cb1d5071a0263e68 +size 4630647408 diff --git a/model-00030-of-00086.safetensors b/model-00030-of-00086.safetensors old mode 100644 new mode 100755 index 34790a6c30de58a3780313c206c89875a6c223ab..0bc440d844ff8a893bec2c9fb42f1ca7e11d78ed --- a/model-00030-of-00086.safetensors +++ b/model-00030-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:abdea2093d079a3f01f2dc2815412fb8fb58b3932448c10a5dbef3325b71d4c4 -size 4932570160 +oid sha256:d05543cffba02c72563b3d48778e503b184b14d8baeb97d6ee12dcd6fdc87031 +size 4932570352 diff --git a/model-00031-of-00086.safetensors b/model-00031-of-00086.safetensors old mode 100644 new mode 100755 index 6ed7731cba1df8037f2538a80fb0499f3b2c6fd9..ab0952eb10f1afde1f3b3260cfec5d17ecc964ca --- a/model-00031-of-00086.safetensors +++ b/model-00031-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7f83ffc139abca27d301931d476347e484375f4100e0d98ca68b8cb86e52a65d -size 4630647016 +oid sha256:dd02643b1c6bfe4f940798ef3b7fe33abe7f3c1512c4f78455e662430b6da3ef +size 4630647408 diff --git a/model-00032-of-00086.safetensors b/model-00032-of-00086.safetensors old mode 100644 new mode 100755 index 83cbebd0b9f8760c4a824700b7056960ec5b722e..503369b08fcb64634631f7e35ede80852174dde8 --- a/model-00032-of-00086.safetensors +++ b/model-00032-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:71e78f25c7b2d4db79f65373d597970173872b9a3808401b61d3b28701166244 -size 4932570160 +oid sha256:ce6828f0efc4fd8c38346aefbc11fb4febe9ee273bcca717d7e7013437573274 +size 4932570352 diff --git a/model-00033-of-00086.safetensors b/model-00033-of-00086.safetensors old mode 100644 new mode 100755 index 5b1d04038c4da4f47391928c1c7da3f716d3e0e8..955a6d27456caec83b68cf6a66b05414649f2e3d --- a/model-00033-of-00086.safetensors +++ b/model-00033-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fbb68bc5ef9951cf2e0182a0b5cbc3e3773de794614cd869cda015fd4c386678 -size 4630647016 +oid sha256:568e41cb95fcdbb867bfd0505ee91f081ab3ef4ed1a76257cb2d9f8a695d85ef +size 4630647408 diff --git a/model-00034-of-00086.safetensors b/model-00034-of-00086.safetensors old mode 100644 new mode 100755 index 35172d3390bfed82a376a99b0e4a43af2e10cedd..9aa650823d02d44158163950fbaca9b837c06721 --- a/model-00034-of-00086.safetensors +++ b/model-00034-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:964c58979eb119eb30d052da0251c14138629d12522cb664475c354cff1852d9 -size 4932570160 +oid sha256:a5b97eef1afad1cd653091a2869b24cdee4601a696ab6351ebfe4b5f5812cac2 +size 4932570352 diff --git a/model-00035-of-00086.safetensors b/model-00035-of-00086.safetensors old mode 100644 new mode 100755 index 52c7edd713324886fcfd2b57a3e2c732a19059ec..85c8def50389015cce993b33b550808a3541e6a6 --- a/model-00035-of-00086.safetensors +++ b/model-00035-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e2b5aa300bab7e85a480d0a7f847816a458398da82a4f8a5e512b19564809e3a -size 4630647016 +oid sha256:6c0a98b9a4a15f2e82f0ddb4fcf32181b7a52f250882099a24ea24dfb3f8e2d1 +size 4630647408 diff --git a/model-00036-of-00086.safetensors b/model-00036-of-00086.safetensors old mode 100644 new mode 100755 index 39cbb17ffc0e17fa89d989589765d87cb3c4c922..40227ddc8018c31f1dc375815d5738bd82b7e3b4 --- a/model-00036-of-00086.safetensors +++ b/model-00036-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bd9195bea0a5992ddce286acfcc2ee9de6859ae8faaaf9176fcb854c89803248 -size 4932570160 +oid sha256:ccbaf5750e035b19b49f5de870e9e286420cb10feada2d9ad9c8e317a084b611 +size 4932570352 diff --git a/model-00037-of-00086.safetensors b/model-00037-of-00086.safetensors old mode 100644 new mode 100755 index 808fb3be3afa86523e6ff0e7f2c6b619016b048f..ab025c31a60bc51e01b9e19e218ca022259e97cf --- a/model-00037-of-00086.safetensors +++ b/model-00037-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:67915574bf7aad4ee6299f33e7706ab40e0437e3014ea6c7a42eadcf582099cc -size 4630647016 +oid sha256:588a38ee00fd720e8004c3b4037ef068a4f62b63e9a559fc2f065e6d875fe917 +size 4630647408 diff --git a/model-00038-of-00086.safetensors b/model-00038-of-00086.safetensors old mode 100644 new mode 100755 index 80d92985f9f2cc678c683e2a6f8ac7cf381a63b9..f830ec9c6aa7bcdf44d0799a71b1f30ea10bc38e --- a/model-00038-of-00086.safetensors +++ b/model-00038-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:dd5cdcddffcd6b4dde66e5fe7070e8a8447981471baf345fa1db79b080d8e521 -size 4932570160 +oid sha256:11ab9d20141486cc788cb7913961f8bc7c4e30cbc3294b7d12df3b7d176b070e +size 4932570352 diff --git a/model-00039-of-00086.safetensors b/model-00039-of-00086.safetensors old mode 100644 new mode 100755 index a54b3fbd6b7a2983f3339869820eb5eee1ff837c..f7803357ea93698de3a1e6ee96c06365b7e0a756 --- a/model-00039-of-00086.safetensors +++ b/model-00039-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:027128272cf78c68ce3261cd8f7834f3e52e1d12bd7793fe2f3f10e33f629d3a -size 4630647016 +oid sha256:43138bc5e05771e996aa41e2a3c30a168da7743aba6a303cfcc9f3e37b78cb34 +size 4630647408 diff --git a/model-00040-of-00086.safetensors b/model-00040-of-00086.safetensors old mode 100644 new mode 100755 index 6f63c0881ada9de9c080303ea5475b8d4cccd535..ddf4d28d96eac4952bea0e63c4a524162440c0c9 --- a/model-00040-of-00086.safetensors +++ b/model-00040-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:33fd2044a3e4a6f5557953fe7683d8fdbd37a83d56568e5aacf2435d7e8d3a5b -size 4932570160 +oid sha256:64851d0838ece22a5b8c094ae3ad7c411cdf508f0bc35bc97050c814296fe4bc +size 4932570352 diff --git a/model-00041-of-00086.safetensors b/model-00041-of-00086.safetensors old mode 100644 new mode 100755 index 415562fab3247703fa6e8de4f0d464149b3c5b51..2543e104df85e4695e51cb35a38c4267e783f10a --- a/model-00041-of-00086.safetensors +++ b/model-00041-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:94fb40bbc0a21692d63a2b59a3648cf5d380232e53f38860a28f5d907fc4c6a9 -size 4630647016 +oid sha256:fccf8d756f081c64f3165cc808902a255c58e4c5b0961d43f12ecd935971ea4d +size 4630647408 diff --git a/model-00042-of-00086.safetensors b/model-00042-of-00086.safetensors old mode 100644 new mode 100755 index 8a74a7c4f0897bd93cbddefb477be2a1289feaa7..500346be0d2952528c473042abe8438cf1cb68dc --- a/model-00042-of-00086.safetensors +++ b/model-00042-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bc8e474d63ebeb8a0704470bd45a83c4d6d3214e189b250e0c010ebcc2363ec1 -size 4932570160 +oid sha256:bba06c8c26b2680d4faaa5b14337ea732159685dfd94497873e777f0b62134c2 +size 4932570352 diff --git a/model-00043-of-00086.safetensors b/model-00043-of-00086.safetensors old mode 100644 new mode 100755 index e752921a5f47840e368f22805d3e1577f57eb02b..d6833264b87785b5550a2dc218c7231ca81f6152 --- a/model-00043-of-00086.safetensors +++ b/model-00043-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:42ddf7231acf5bc45f939b6aeba554cb6fbe4753f3ac815df5c9b50f06484e11 -size 4630647016 +oid sha256:48f642a7140bb650a29cd63de0ba06600a03a040be89a186dda6d321a1ae04a4 +size 4630647408 diff --git a/model-00044-of-00086.safetensors b/model-00044-of-00086.safetensors old mode 100644 new mode 100755 index 42a4ac06f861a86d1b7d11aba6a5e7cca0f5e827..8ba2a5c5e813bc675bbf310f12ba608949e0f1a2 --- a/model-00044-of-00086.safetensors +++ b/model-00044-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:25b2f8a47a783918462e4ba4547bf98645dad8203fd22b5f78b773d82d4bba3d -size 4932570160 +oid sha256:581b3c3e822b4b9c51b2fb7f33c523fafaadaa883cab2a0db77a633bcaa18f2e +size 4932570352 diff --git a/model-00045-of-00086.safetensors b/model-00045-of-00086.safetensors old mode 100644 new mode 100755 index bbcffccafd49af56f15641f0377193ec6ca46ba5..57bebc7231f04ff3f90681e232e2db34c2c4144c --- a/model-00045-of-00086.safetensors +++ b/model-00045-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:fadbc818e2f55c0cc1750e64502436aa7965682fe80db297a83861eda1bcd2a4 -size 4630647016 +oid sha256:e7b998c7f79ecc3e35c44a826fc852f668fdba321fcf0bc425f76c7d2268f346 +size 4630647408 diff --git a/model-00046-of-00086.safetensors b/model-00046-of-00086.safetensors old mode 100644 new mode 100755 index d447c522a63b920c6f965787c7862ff7141dec9f..833d7b79a5275ca9398dae0d0b811863d64746c0 --- a/model-00046-of-00086.safetensors +++ b/model-00046-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0c02fe1c87881687b3b0927cfbc1ec41edf3c4f7cf87f4db7661a3f89fc7ebc0 -size 4932570160 +oid sha256:9ae654baf986eecbcdd32e09a4e69d233acdbf9ba0dfa7c36cc47465f633b4db +size 4932570352 diff --git a/model-00047-of-00086.safetensors b/model-00047-of-00086.safetensors old mode 100644 new mode 100755 index 2dde8d8c48a86f1db8f01123b8cda796f87bcf3e..0862fb66c44e4bee92dd5da6eb3b2ef26248fc66 --- a/model-00047-of-00086.safetensors +++ b/model-00047-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:21f03e7a296a843b26247ec41217d6d5ce9e6abcc251fbeae5d66519eeb8cb4b -size 4630647016 +oid sha256:58063a98746e732d6b191ecb56943cffe85b5717278f5689c85aece01c15cfdf +size 4630647408 diff --git a/model-00048-of-00086.safetensors b/model-00048-of-00086.safetensors old mode 100644 new mode 100755 index f46fb2ef9945f30267f9b3b784f8c5d724bddf4e..e972f363374f1e2c86e71fa37ae6680dce40b097 --- a/model-00048-of-00086.safetensors +++ b/model-00048-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d59464b9c95486a4fb27685a877e7679c39693fb16b11a0fac114f5ce2a2d80c -size 4932570160 +oid sha256:45b2c1169f6060661d077091fcda24a6fd1cc52c0eba1d5f938d4579b1ae13f2 +size 4932570352 diff --git a/model-00049-of-00086.safetensors b/model-00049-of-00086.safetensors old mode 100644 new mode 100755 index 6785f26f230cb4e17b5dbe6d2ff03242ae946004..29ef3fe4dda6900b6dbf8cc9343ee181cd69563a --- a/model-00049-of-00086.safetensors +++ b/model-00049-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d8ab572e4d3bb25764fde3e732806e94e784c28fcaa9230ccd07a7b190969cfd -size 4630647016 +oid sha256:48d48f4a18ca4029ba568ebf66623f0cce5da126a7e67b959cf1b159d653975d +size 4630647408 diff --git a/model-00050-of-00086.safetensors b/model-00050-of-00086.safetensors old mode 100644 new mode 100755 index 3f2594a5c439b4b8c54fa3888b49ce5803709433..4a6ac9b84c6ded8fc74f8f5fb46c91396c205abc --- a/model-00050-of-00086.safetensors +++ b/model-00050-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:db5753b295e9381ad6f7903cf7adc1ec6e07f4f59bcd71ab074ecb5647b3ac6f -size 4932570160 +oid sha256:87b7cbbbeb2f51e4b90dfb46b18005876f3d3a892b662f52a6828c731d4ca271 +size 4932570352 diff --git a/model-00051-of-00086.safetensors b/model-00051-of-00086.safetensors old mode 100644 new mode 100755 index 9b4331511200daab0ae05e604a0d14ada3523288..934c348132238ad4aa8e4a60facdb9d1147e6df4 --- a/model-00051-of-00086.safetensors +++ b/model-00051-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2dd55f2cd5bf927133bc5d18c5ebda6defa4983448491ba8d907e4f557e9d2ab -size 4630647016 +oid sha256:1beac2a81866a1beb0e039185ed860eee5d34ee989ed2d9c3075e3c621df5e50 +size 4630647408 diff --git a/model-00052-of-00086.safetensors b/model-00052-of-00086.safetensors old mode 100644 new mode 100755 index c72356e525e5b5501bb920f89041909d6139fce7..03f0254e32f61c08eb53ee18a8cfdb5b9f21a658 --- a/model-00052-of-00086.safetensors +++ b/model-00052-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:357596d67c2d5d3223d1a26277775b9ca718c5698ea0eafba85bf12e60175b7f -size 4932570160 +oid sha256:aaa99e3aa5ad43fbe6d3d4a91ea90793dea110b7664e838d15ee96fab14083bf +size 4932570352 diff --git a/model-00053-of-00086.safetensors b/model-00053-of-00086.safetensors old mode 100644 new mode 100755 index af1b366c11c6f990305008009b39105391e0f765..02f0b1a73fe16c462db6eed3d7afcd1545862100 --- a/model-00053-of-00086.safetensors +++ b/model-00053-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8b7acce89e3911ce01b116cc0d0de4c35f6ee8ab0d79491f05b647f66719d881 -size 4630647016 +oid sha256:8c27505b918f8045b68ed210c43d79e0bcc9fcf3262fef5837562b1817ef645b +size 4630647408 diff --git a/model-00054-of-00086.safetensors b/model-00054-of-00086.safetensors old mode 100644 new mode 100755 index a6aef2d1078d159d5aa356e19fc4fdaa74031211..641f83159448a7792c95a796ffba4b24efc4f98a --- a/model-00054-of-00086.safetensors +++ b/model-00054-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a06caaea84733c021bd720ca791b117394df50b568d10fe8b81b510059bfe067 -size 4932570160 +oid sha256:93c9df1223c62952b293d949d71b7eb473342b4110e1c38540f315577cca3627 +size 4932570352 diff --git a/model-00055-of-00086.safetensors b/model-00055-of-00086.safetensors old mode 100644 new mode 100755 index d35f99ad52341a0d6cb5ae369879ae41ea2dff6f..8e423ed0273afa82cc941dc8f37c2efc3217bcfc --- a/model-00055-of-00086.safetensors +++ b/model-00055-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:521ae77b8ab363add741c7d5f985d709d08ca606d09ff7147babc1712f59722c -size 4630647016 +oid sha256:31afbcb6b6f6e36cf46f605c1d0bcca50391b64dfe99f6c6a39231919cb577e7 +size 4630647408 diff --git a/model-00056-of-00086.safetensors b/model-00056-of-00086.safetensors old mode 100644 new mode 100755 index eaffd1dba0e14450e93e07869d6829a45b37ad2f..b8b65ef242b2a6ec4e66d1991d5c57d83bb77a9f --- a/model-00056-of-00086.safetensors +++ b/model-00056-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1260ee9956bfe9c67a3a7e5138122a7c47fcda1fad23629689137f3f4ac098d9 -size 4932570160 +oid sha256:2daa823436114cb6922b6b5f620e2796bed59b731eac6ab1ec9a2b73ee49039f +size 4932570352 diff --git a/model-00057-of-00086.safetensors b/model-00057-of-00086.safetensors old mode 100644 new mode 100755 index 8b382147989a800116b4cbf799e3b9a4e8afe72e..8cd03aa8b6f51b65efd84d5187ffc6e7a8bf1c1d --- a/model-00057-of-00086.safetensors +++ b/model-00057-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:492b7aec58f7857a6880362bdec1ba2d07bfc8e5b4ee8959b0a7b21378b33bd3 -size 4630647016 +oid sha256:7ad5353316b8f233593a29ca44aa3a5db295ea9d376ec2275213c037f451ef55 +size 4630647408 diff --git a/model-00058-of-00086.safetensors b/model-00058-of-00086.safetensors old mode 100644 new mode 100755 index 2f3b804700b05ef6a1b810342d16d320c5066c8c..1d61f0ba741bb1e51c05b6a0fb080592526b2b20 --- a/model-00058-of-00086.safetensors +++ b/model-00058-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:73b9a221ad00c1207a3752c1e8d25eacc621762d74942ebdbb07fe43089b18d0 -size 4932570160 +oid sha256:05f0b0c60d534ab7c156930386fb61b62944e3d82b63fce3244220128be4b540 +size 4932570352 diff --git a/model-00059-of-00086.safetensors b/model-00059-of-00086.safetensors old mode 100644 new mode 100755 index a895f89c4b8865bb8c591117abce37958d1f581c..8a377fb56642aef53dbd5809dc3e4da8c3aed003 --- a/model-00059-of-00086.safetensors +++ b/model-00059-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f8eaf3290ff0ecd46b546e4c50cef1b7dd71fe552d178f096459bf09c03fae2b -size 4630647016 +oid sha256:c6f9ba897d1faa8b6427a798ab198a3d3f7675cd4f44241fa4fd0cb3d3d5ccfc +size 4630647408 diff --git a/model-00060-of-00086.safetensors b/model-00060-of-00086.safetensors old mode 100644 new mode 100755 index ea8dd0a3cc2baa70e7f479458b665d75c205e40f..50a8b7c3d5ad18d7f67afa81ec01b8f7a6666bb3 --- a/model-00060-of-00086.safetensors +++ b/model-00060-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:96a738f61adb669bf922e8f3fc0f3ec8ad70463b3f3375e1df618f965b63655c -size 4932570160 +oid sha256:8808a949f3083ba029eeeda71a257674d83e6a77935008f262ccad3b7e5f3f64 +size 4932570352 diff --git a/model-00061-of-00086.safetensors b/model-00061-of-00086.safetensors old mode 100644 new mode 100755 index 27a66b894eae6fdef35ce2385eb4444e1cb7a476..0cb565f19525147568707277207f939a63677130 --- a/model-00061-of-00086.safetensors +++ b/model-00061-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:653c76c5379bf742be8fd1858191aa3cefb7b7ea7a7e2de5290a7021b79def84 -size 4630647016 +oid sha256:a90fa59790fbcca6ae69c2c0d2e7cf184fbe28da8529a6bfcb236c3839b6af76 +size 4630647408 diff --git a/model-00062-of-00086.safetensors b/model-00062-of-00086.safetensors old mode 100644 new mode 100755 index 140eb44d5836f1adf850d8ed5124c9167b114e7f..f59d44e8ea2e39feb88d139b24d5f7a76af0201c --- a/model-00062-of-00086.safetensors +++ b/model-00062-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6d09e5ad0a811aaaceb3a33c8c7dab7cbb681a7452097735c51b4ca49a52dc4d -size 4932570160 +oid sha256:8ee567ef9eb34df9aa10109e623c627ebb3511229d2039393320e2aaefbab17b +size 4932570352 diff --git a/model-00063-of-00086.safetensors b/model-00063-of-00086.safetensors old mode 100644 new mode 100755 index 89e48d33666d6143c11db5ced3f5d9fdf28f7138..7acbddafa603bee67cab2b2ad50aa1fa63efe7cd --- a/model-00063-of-00086.safetensors +++ b/model-00063-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8352d626a11e1e675be89f905f42cbb3793cedfe27e13108de85857a4c37bf8b -size 4630647016 +oid sha256:abfb877f9ec8c702d58620b4cfb34fb9c3b06c61fdfb3b6d5a0c11b7268f15a7 +size 4630647408 diff --git a/model-00064-of-00086.safetensors b/model-00064-of-00086.safetensors old mode 100644 new mode 100755 index 23b79e9523f9fd934f02f2c2702d7f5f92024522..e5f0c2a5ba3ed18d07b2f9e99d6a5babe40d124e --- a/model-00064-of-00086.safetensors +++ b/model-00064-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:117e1b8d7ac6b7c8d609fddf5cb2d9e1d55e96e07e5327d28bc3ff44daafe23c -size 4932570160 +oid sha256:a634c4d34d23d71a7c8fe6b0a6777057cfe3205bc6c06ac262ab2136f690078c +size 4932570352 diff --git a/model-00065-of-00086.safetensors b/model-00065-of-00086.safetensors old mode 100644 new mode 100755 index d553834c901031536e2a8ea4cb0fa0dca95a580c..0ac19a1151db84e27161404be7602c4cf7dfc81a --- a/model-00065-of-00086.safetensors +++ b/model-00065-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ca48744910e107cb36e843d456a55632b9cc2d5bf81ba3eb4937591ebdac9741 -size 4630647016 +oid sha256:5ec97ee406b8ad00289ba8de106a7a68f86c42582670000fae0440c693c5334a +size 4630647408 diff --git a/model-00066-of-00086.safetensors b/model-00066-of-00086.safetensors old mode 100644 new mode 100755 index 24666cd1ce62f5e6ee9a1152ed9c1668b8e42dd2..cd8de1ba927eb27a515fc526354b4592c8e2b4d0 --- a/model-00066-of-00086.safetensors +++ b/model-00066-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1523b823e45513fef3f42b2ed85cad16fd50777fda884bd0cda333bffd5f70c0 -size 4932570160 +oid sha256:b6e6dd2649a5761289ebffb326ee82e53b2661bafaa2bc78e0023f39929e3682 +size 4932570352 diff --git a/model-00067-of-00086.safetensors b/model-00067-of-00086.safetensors old mode 100644 new mode 100755 index 6a88229aecaea0c07fb5a558889227d4ac5b58db..6a5a093fa59e208a7192e5da6052bb18a910832a --- a/model-00067-of-00086.safetensors +++ b/model-00067-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6f3fdfbad44c667366ecf5d60a4db2a7f641c6c7ec91a52295dcaa3161a2881e -size 4630647016 +oid sha256:d6d5c643758e96fa8a4a49e602aa5a5ad0331556eef093c0710adfda71c8d475 +size 4630647408 diff --git a/model-00068-of-00086.safetensors b/model-00068-of-00086.safetensors old mode 100644 new mode 100755 index d84762ad85dee9192c287c765b23d32822ba52dc..099a54f6da421a5e6d39b72a9bdf601e82355c06 --- a/model-00068-of-00086.safetensors +++ b/model-00068-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:39306dc1f7bca8d06c5069bcae704839d2edb044389a76faed95014ce272928c -size 4932570176 +oid sha256:d25ba4b80245de5c52a0381f975627dce68b08145423712731f0ef29a8f41fae +size 4932570376 diff --git a/model-00069-of-00086.safetensors b/model-00069-of-00086.safetensors old mode 100644 new mode 100755 index 46027b901326fb2067bfd40ebd66f5f13b262be7..4538413dabfd0f60c36dd255eae161118ba5bb73 --- a/model-00069-of-00086.safetensors +++ b/model-00069-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0465b8741cb26bce90e0d6942f02d9b7708b6aefbb2d315b642cf5e4e1aa1641 -size 4630647056 +oid sha256:aea05e78403652a584a68d383f566469efc1cb493514a059df82b52b3dfbd7ce +size 4630647456 diff --git a/model-00070-of-00086.safetensors b/model-00070-of-00086.safetensors old mode 100644 new mode 100755 index 417bc08b56b79154a788ae44259729403feb5240..554ec0a394f2fcd6d768e4dd70a6eb040aeb2481 --- a/model-00070-of-00086.safetensors +++ b/model-00070-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d9931b7cd25138d286663f34506abf3ae533e94ea59d2fd5f48ce461d94b4460 -size 4932570184 +oid sha256:76597dbd64c9d678321192fa1e0790c95510a63041921ec16f1994963c27321f +size 4932570384 diff --git a/model-00071-of-00086.safetensors b/model-00071-of-00086.safetensors old mode 100644 new mode 100755 index 0583dfb4516d8b5a6c2f0e98a6c095c985e4e799..c8d2c99a5ccb9569fc429637f17c057194e041e5 --- a/model-00071-of-00086.safetensors +++ b/model-00071-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d7ded6919d648e84e502196ffdb05374665352c54cb86208b00ff07e62f60c27 -size 4630647056 +oid sha256:51a56d8273b68348e9242c381ace97d2d0eadbc0eec46e75d0e76510f6d07fe3 +size 4630647456 diff --git a/model-00072-of-00086.safetensors b/model-00072-of-00086.safetensors old mode 100644 new mode 100755 index de9700be1618e29402a942fed58f5afe4a6d16f0..51df4228fe03fc7fe2a631a13eba5cbaede4c9f0 --- a/model-00072-of-00086.safetensors +++ b/model-00072-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5c2230487700317f73080ddaf018492c56be7f09c7d5661d6f7878f1e45df6c9 -size 4932570184 +oid sha256:bac2537aa69ca35c985538a35af961cefcef6b39abf3b9f247c1d43e408ea18d +size 4932570384 diff --git a/model-00073-of-00086.safetensors b/model-00073-of-00086.safetensors old mode 100644 new mode 100755 index 62fe6f3b8a4d458bd0df953379e8b35d00a019fe..819182c024be4670023f920f2f53419ed3c5d1c5 --- a/model-00073-of-00086.safetensors +++ b/model-00073-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b807dbc63fd28822e2db38bb492b5fd98846f6196f2e45db768f967c85182b6d -size 4630647056 +oid sha256:c69174cdb77cd2487f4c744f718606af712689987f268e0bf39eedf263db80b2 +size 4630647456 diff --git a/model-00074-of-00086.safetensors b/model-00074-of-00086.safetensors old mode 100644 new mode 100755 index d987ab5e1f8ef900aac03ac2933b94e7d1145f5d..f416d92a7e7a7701f4b7122e6cf68b540b0f2c4e --- a/model-00074-of-00086.safetensors +++ b/model-00074-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2296637c5aa8461c3a8650e63f373af5575a840c25adaf88ca742a27896f7f4b -size 4932570184 +oid sha256:70c7e97e4577a023dc030ce76bb3cdfaf4cb4511ce726ae8bebf5fb9a080af79 +size 4932570384 diff --git a/model-00075-of-00086.safetensors b/model-00075-of-00086.safetensors old mode 100644 new mode 100755 index 11640a76451b9500bc491ca4198a0f83d51fc298..38abe814e798a41cebf44b5658a96f43cca353e0 --- a/model-00075-of-00086.safetensors +++ b/model-00075-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4c016e59b03d4c5163f0edc8a2f1336b06fad7b518d63d2fff4dc2b02b48b0f4 -size 4630647056 +oid sha256:6bd011dde87e4100e8e59cda31697019d423862799244aa5a7362089325fa774 +size 4630647456 diff --git a/model-00076-of-00086.safetensors b/model-00076-of-00086.safetensors old mode 100644 new mode 100755 index 0b7ca59e33eaadb5c62b0651e189a57b71075db5..5541212ee6643cf87fcbe3ade155da68cf919989 --- a/model-00076-of-00086.safetensors +++ b/model-00076-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0a7c5d1cd6880cb126468126fb5134cba75095356409387eea81f0f90f93db6f -size 4932570184 +oid sha256:875cce3596915f75942f375478a315174af8a825f71dadb8d12bd889902d1302 +size 4932570384 diff --git a/model-00077-of-00086.safetensors b/model-00077-of-00086.safetensors old mode 100644 new mode 100755 index b383ced88088a879959f9f4d12ec80e6295b2432..e9982bc03488e0e433f6e4bc43a4677403333a70 --- a/model-00077-of-00086.safetensors +++ b/model-00077-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:725f9a276496c8f7c6fc5b06eb42f73c36595509221641f2d4b848e62d279896 -size 4630647056 +oid sha256:6f2b941a45397927df922a58e180f3e24ed0ee938d338a2200c443d6bdf7e512 +size 4630647456 diff --git a/model-00078-of-00086.safetensors b/model-00078-of-00086.safetensors old mode 100644 new mode 100755 index a351c4a34a509c861e7675522ba4c365adf3da87..fa5f72b22116db9376bca94de667f6ed88252df2 --- a/model-00078-of-00086.safetensors +++ b/model-00078-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9ef47e2b85f74fa467bc2107c43671d1e8ff987ab56d382165d373dc6f66705e -size 4932570184 +oid sha256:fe106f1f3b55d05e9457f66ad404b3ab727c8b7b6e25b8d16e3e35031166c007 +size 4932570384 diff --git a/model-00079-of-00086.safetensors b/model-00079-of-00086.safetensors old mode 100644 new mode 100755 index 9a6d5c088a0ca9c7fc5f7090821f0d3650333d27..00016f3e05ebb4d34b14c1d6b31e54a1b0f85079 --- a/model-00079-of-00086.safetensors +++ b/model-00079-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9b81f0962f9a0f7733fd689ef7e01324075b2eb7239f87e0b573bccd23c2c696 -size 4630647056 +oid sha256:973cea3ee5be6c42b5e5253fd1bf83c24788aba8bfb5e7accf7ba580c890e76a +size 4630647456 diff --git a/model-00080-of-00086.safetensors b/model-00080-of-00086.safetensors old mode 100644 new mode 100755 index 8a2adb3f52c8c1d77b1cad2d0ef69adb50dd7d63..cbe8b12a0097959de01cf94981042579ded766ec --- a/model-00080-of-00086.safetensors +++ b/model-00080-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7335ec5fdbfe67d5af062e7bf1272e06cbef2a01e9de3704ee0d51f8eced72f3 -size 4932570184 +oid sha256:3011feca5cc7c68d91ec780fa136fd8425fee4b34ab1ef9889844e0a9767cb23 +size 4932570384 diff --git a/model-00081-of-00086.safetensors b/model-00081-of-00086.safetensors old mode 100644 new mode 100755 index dce185bda0314d29fc3b0aef52826fa1af117619..7e9be00151a97093fa0e7f673af23af62d4f09f9 --- a/model-00081-of-00086.safetensors +++ b/model-00081-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:73598e3741c7ce7724656903194866eb23790ff6d3d3ed2766c405a67d8a8656 -size 4630647056 +oid sha256:4720d070265a2801051da939ed96cd034e8a1b6025419dd0d5c019574a41b808 +size 4630647456 diff --git a/model-00082-of-00086.safetensors b/model-00082-of-00086.safetensors old mode 100644 new mode 100755 index 397d9c399e0a452c1b6a9795ac103b0c930e86aa..c2ef11cab4b0fbad0d3f0674b0509d54edb35ce8 --- a/model-00082-of-00086.safetensors +++ b/model-00082-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:faacf479050fa5c8482737c0d4ff27c3a3c1bb3e2095247fa58488d8e046efb9 -size 4932570184 +oid sha256:3d89db12ff6cd43cc298b78e871909cde2225e26c1125568138c6d65cedb474d +size 4932570384 diff --git a/model-00083-of-00086.safetensors b/model-00083-of-00086.safetensors old mode 100644 new mode 100755 index 1066aa573c21c2dc4af0fb34cffd95bc55184db3..9f006eefe1a1fa57ef5c3c412ce4af9fb016a7bf --- a/model-00083-of-00086.safetensors +++ b/model-00083-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d1a7e31b497c3cb5ed185c5b10f129c8c330b3b1c89e5598cdd5392d9c54bcbd -size 4630647056 +oid sha256:12d8445e964cf6e06eed9f842daaa23f0c98d6abd9c1eba1ca08be1ee235afba +size 4630647456 diff --git a/model-00084-of-00086.safetensors b/model-00084-of-00086.safetensors old mode 100644 new mode 100755 index 3f9c5b13c1cf93964c14dff3a9e9d98caf7f9e24..0c4317ee58299cc6bca3075a0759b178eb2afa82 --- a/model-00084-of-00086.safetensors +++ b/model-00084-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:e84e77c143f9e257d8d6429abb2dd6de4cfb37658ed75d2966ba06aaea47c4d7 -size 4932570184 +oid sha256:f0814a549438d670fe66fc13c721c28c51dfcaa33a48beff3cefed47a5190894 +size 4932570384 diff --git a/model-00085-of-00086.safetensors b/model-00085-of-00086.safetensors old mode 100644 new mode 100755 index 5700a2d9b765d8c4318bdfd020acb65bffcc19d7..cdb48bee6759222ff893f875dac9aece2b42c147 --- a/model-00085-of-00086.safetensors +++ b/model-00085-of-00086.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8fd4a8e97ef72ce896c3347619a7599f9cf4712d32ea9b466989bab6d91d8eff -size 4060253248 +oid sha256:5d461847ccb77cc3facde83129fdbdde0eaff62b2c2164272caec26e7f0477a6 +size 4060253448 diff --git a/model-00086-of-00086.safetensors b/model-00086-of-00086.safetensors old mode 100644 new mode 100755 diff --git a/model.safetensors.index.json b/model.safetensors.index.json old mode 100644 new mode 100755 index a9f556dee30b21d05d572d14cfdb00ce41a1816a..c4e5b2a60c480d09bf296984b08adf33811854e0 --- a/model.safetensors.index.json +++ b/model.safetensors.index.json @@ -1,6 +1,6 @@ { "metadata": { - "total_size": 410060233616 + "total_size": 410060234624 }, "weight_map": { "lm_head.weight": "model-00086-of-00086.safetensors", @@ -17,6 +17,7 @@ "model.layers.0.mlp.up_proj.weight_scale": "model-00002-of-00086.safetensors", "model.layers.0.post_attention_layernorm.weight": "model-00002-of-00086.safetensors", "model.layers.0.self_attn.k_proj.input_scale": "model-00001-of-00086.safetensors", + "model.layers.0.self_attn.k_proj.k_scale": "model-00001-of-00086.safetensors", "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00086.safetensors", "model.layers.0.self_attn.k_proj.weight_scale": "model-00001-of-00086.safetensors", "model.layers.0.self_attn.o_proj.input_scale": "model-00001-of-00086.safetensors", @@ -26,6 +27,7 @@ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00086.safetensors", "model.layers.0.self_attn.q_proj.weight_scale": "model-00001-of-00086.safetensors", "model.layers.0.self_attn.v_proj.input_scale": "model-00001-of-00086.safetensors", + "model.layers.0.self_attn.v_proj.v_scale": "model-00001-of-00086.safetensors", "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00086.safetensors", "model.layers.0.self_attn.v_proj.weight_scale": "model-00001-of-00086.safetensors", "model.layers.1.input_layernorm.weight": "model-00003-of-00086.safetensors", @@ -40,6 +42,7 @@ "model.layers.1.mlp.up_proj.weight_scale": "model-00002-of-00086.safetensors", "model.layers.1.post_attention_layernorm.weight": "model-00003-of-00086.safetensors", "model.layers.1.self_attn.k_proj.input_scale": "model-00002-of-00086.safetensors", + "model.layers.1.self_attn.k_proj.k_scale": "model-00002-of-00086.safetensors", "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00086.safetensors", "model.layers.1.self_attn.k_proj.weight_scale": "model-00002-of-00086.safetensors", "model.layers.1.self_attn.o_proj.input_scale": "model-00002-of-00086.safetensors", @@ -49,6 +52,7 @@ "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00086.safetensors", "model.layers.1.self_attn.q_proj.weight_scale": "model-00002-of-00086.safetensors", "model.layers.1.self_attn.v_proj.input_scale": "model-00002-of-00086.safetensors", + "model.layers.1.self_attn.v_proj.v_scale": "model-00002-of-00086.safetensors", "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00086.safetensors", "model.layers.1.self_attn.v_proj.weight_scale": "model-00002-of-00086.safetensors", "model.layers.10.input_layernorm.weight": "model-00009-of-00086.safetensors", @@ -63,6 +67,7 @@ "model.layers.10.mlp.up_proj.weight_scale": "model-00008-of-00086.safetensors", "model.layers.10.post_attention_layernorm.weight": "model-00009-of-00086.safetensors", "model.layers.10.self_attn.k_proj.input_scale": "model-00008-of-00086.safetensors", + "model.layers.10.self_attn.k_proj.k_scale": "model-00008-of-00086.safetensors", "model.layers.10.self_attn.k_proj.weight": "model-00008-of-00086.safetensors", "model.layers.10.self_attn.k_proj.weight_scale": "model-00008-of-00086.safetensors", "model.layers.10.self_attn.o_proj.input_scale": "model-00008-of-00086.safetensors", @@ -72,6 +77,7 @@ "model.layers.10.self_attn.q_proj.weight": "model-00008-of-00086.safetensors", "model.layers.10.self_attn.q_proj.weight_scale": "model-00008-of-00086.safetensors", "model.layers.10.self_attn.v_proj.input_scale": "model-00008-of-00086.safetensors", + "model.layers.10.self_attn.v_proj.v_scale": "model-00008-of-00086.safetensors", "model.layers.10.self_attn.v_proj.weight": "model-00008-of-00086.safetensors", "model.layers.10.self_attn.v_proj.weight_scale": "model-00008-of-00086.safetensors", "model.layers.100.input_layernorm.weight": "model-00069-of-00086.safetensors", @@ -86,6 +92,7 @@ "model.layers.100.mlp.up_proj.weight_scale": "model-00068-of-00086.safetensors", "model.layers.100.post_attention_layernorm.weight": "model-00069-of-00086.safetensors", "model.layers.100.self_attn.k_proj.input_scale": "model-00068-of-00086.safetensors", + "model.layers.100.self_attn.k_proj.k_scale": "model-00068-of-00086.safetensors", "model.layers.100.self_attn.k_proj.weight": "model-00068-of-00086.safetensors", "model.layers.100.self_attn.k_proj.weight_scale": "model-00068-of-00086.safetensors", "model.layers.100.self_attn.o_proj.input_scale": "model-00068-of-00086.safetensors", @@ -95,6 +102,7 @@ "model.layers.100.self_attn.q_proj.weight": "model-00068-of-00086.safetensors", "model.layers.100.self_attn.q_proj.weight_scale": "model-00068-of-00086.safetensors", "model.layers.100.self_attn.v_proj.input_scale": "model-00068-of-00086.safetensors", + "model.layers.100.self_attn.v_proj.v_scale": "model-00068-of-00086.safetensors", "model.layers.100.self_attn.v_proj.weight": "model-00068-of-00086.safetensors", "model.layers.100.self_attn.v_proj.weight_scale": "model-00068-of-00086.safetensors", "model.layers.101.input_layernorm.weight": "model-00069-of-00086.safetensors", @@ -109,6 +117,7 @@ "model.layers.101.mlp.up_proj.weight_scale": "model-00069-of-00086.safetensors", "model.layers.101.post_attention_layernorm.weight": "model-00069-of-00086.safetensors", "model.layers.101.self_attn.k_proj.input_scale": "model-00069-of-00086.safetensors", + "model.layers.101.self_attn.k_proj.k_scale": "model-00069-of-00086.safetensors", "model.layers.101.self_attn.k_proj.weight": "model-00069-of-00086.safetensors", "model.layers.101.self_attn.k_proj.weight_scale": "model-00069-of-00086.safetensors", "model.layers.101.self_attn.o_proj.input_scale": "model-00069-of-00086.safetensors", @@ -118,6 +127,7 @@ "model.layers.101.self_attn.q_proj.weight": "model-00069-of-00086.safetensors", "model.layers.101.self_attn.q_proj.weight_scale": "model-00069-of-00086.safetensors", "model.layers.101.self_attn.v_proj.input_scale": "model-00069-of-00086.safetensors", + "model.layers.101.self_attn.v_proj.v_scale": "model-00069-of-00086.safetensors", "model.layers.101.self_attn.v_proj.weight": "model-00069-of-00086.safetensors", "model.layers.101.self_attn.v_proj.weight_scale": "model-00069-of-00086.safetensors", "model.layers.102.input_layernorm.weight": "model-00070-of-00086.safetensors", @@ -132,6 +142,7 @@ "model.layers.102.mlp.up_proj.weight_scale": "model-00070-of-00086.safetensors", "model.layers.102.post_attention_layernorm.weight": "model-00070-of-00086.safetensors", "model.layers.102.self_attn.k_proj.input_scale": "model-00069-of-00086.safetensors", + "model.layers.102.self_attn.k_proj.k_scale": "model-00069-of-00086.safetensors", "model.layers.102.self_attn.k_proj.weight": "model-00069-of-00086.safetensors", "model.layers.102.self_attn.k_proj.weight_scale": "model-00069-of-00086.safetensors", "model.layers.102.self_attn.o_proj.input_scale": "model-00069-of-00086.safetensors", @@ -141,6 +152,7 @@ "model.layers.102.self_attn.q_proj.weight": "model-00069-of-00086.safetensors", "model.layers.102.self_attn.q_proj.weight_scale": "model-00069-of-00086.safetensors", "model.layers.102.self_attn.v_proj.input_scale": "model-00069-of-00086.safetensors", + "model.layers.102.self_attn.v_proj.v_scale": "model-00069-of-00086.safetensors", "model.layers.102.self_attn.v_proj.weight": "model-00069-of-00086.safetensors", "model.layers.102.self_attn.v_proj.weight_scale": "model-00069-of-00086.safetensors", "model.layers.103.input_layernorm.weight": "model-00071-of-00086.safetensors", @@ -155,6 +167,7 @@ "model.layers.103.mlp.up_proj.weight_scale": "model-00070-of-00086.safetensors", "model.layers.103.post_attention_layernorm.weight": "model-00071-of-00086.safetensors", "model.layers.103.self_attn.k_proj.input_scale": "model-00070-of-00086.safetensors", + "model.layers.103.self_attn.k_proj.k_scale": "model-00070-of-00086.safetensors", "model.layers.103.self_attn.k_proj.weight": "model-00070-of-00086.safetensors", "model.layers.103.self_attn.k_proj.weight_scale": "model-00070-of-00086.safetensors", "model.layers.103.self_attn.o_proj.input_scale": "model-00070-of-00086.safetensors", @@ -164,6 +177,7 @@ "model.layers.103.self_attn.q_proj.weight": "model-00070-of-00086.safetensors", "model.layers.103.self_attn.q_proj.weight_scale": "model-00070-of-00086.safetensors", "model.layers.103.self_attn.v_proj.input_scale": "model-00070-of-00086.safetensors", + "model.layers.103.self_attn.v_proj.v_scale": "model-00070-of-00086.safetensors", "model.layers.103.self_attn.v_proj.weight": "model-00070-of-00086.safetensors", "model.layers.103.self_attn.v_proj.weight_scale": "model-00070-of-00086.safetensors", "model.layers.104.input_layernorm.weight": "model-00071-of-00086.safetensors", @@ -178,6 +192,7 @@ "model.layers.104.mlp.up_proj.weight_scale": "model-00071-of-00086.safetensors", "model.layers.104.post_attention_layernorm.weight": "model-00071-of-00086.safetensors", "model.layers.104.self_attn.k_proj.input_scale": "model-00071-of-00086.safetensors", + "model.layers.104.self_attn.k_proj.k_scale": "model-00071-of-00086.safetensors", "model.layers.104.self_attn.k_proj.weight": "model-00071-of-00086.safetensors", "model.layers.104.self_attn.k_proj.weight_scale": "model-00071-of-00086.safetensors", "model.layers.104.self_attn.o_proj.input_scale": "model-00071-of-00086.safetensors", @@ -187,6 +202,7 @@ "model.layers.104.self_attn.q_proj.weight": "model-00071-of-00086.safetensors", "model.layers.104.self_attn.q_proj.weight_scale": "model-00071-of-00086.safetensors", "model.layers.104.self_attn.v_proj.input_scale": "model-00071-of-00086.safetensors", + "model.layers.104.self_attn.v_proj.v_scale": "model-00071-of-00086.safetensors", "model.layers.104.self_attn.v_proj.weight": "model-00071-of-00086.safetensors", "model.layers.104.self_attn.v_proj.weight_scale": "model-00071-of-00086.safetensors", "model.layers.105.input_layernorm.weight": "model-00072-of-00086.safetensors", @@ -201,6 +217,7 @@ "model.layers.105.mlp.up_proj.weight_scale": "model-00072-of-00086.safetensors", "model.layers.105.post_attention_layernorm.weight": "model-00072-of-00086.safetensors", "model.layers.105.self_attn.k_proj.input_scale": "model-00071-of-00086.safetensors", + "model.layers.105.self_attn.k_proj.k_scale": "model-00071-of-00086.safetensors", "model.layers.105.self_attn.k_proj.weight": "model-00071-of-00086.safetensors", "model.layers.105.self_attn.k_proj.weight_scale": "model-00071-of-00086.safetensors", "model.layers.105.self_attn.o_proj.input_scale": "model-00071-of-00086.safetensors", @@ -210,6 +227,7 @@ "model.layers.105.self_attn.q_proj.weight": "model-00071-of-00086.safetensors", "model.layers.105.self_attn.q_proj.weight_scale": "model-00071-of-00086.safetensors", "model.layers.105.self_attn.v_proj.input_scale": "model-00071-of-00086.safetensors", + "model.layers.105.self_attn.v_proj.v_scale": "model-00071-of-00086.safetensors", "model.layers.105.self_attn.v_proj.weight": "model-00071-of-00086.safetensors", "model.layers.105.self_attn.v_proj.weight_scale": "model-00071-of-00086.safetensors", "model.layers.106.input_layernorm.weight": "model-00073-of-00086.safetensors", @@ -224,6 +242,7 @@ "model.layers.106.mlp.up_proj.weight_scale": "model-00072-of-00086.safetensors", "model.layers.106.post_attention_layernorm.weight": "model-00073-of-00086.safetensors", "model.layers.106.self_attn.k_proj.input_scale": "model-00072-of-00086.safetensors", + "model.layers.106.self_attn.k_proj.k_scale": "model-00072-of-00086.safetensors", "model.layers.106.self_attn.k_proj.weight": "model-00072-of-00086.safetensors", "model.layers.106.self_attn.k_proj.weight_scale": "model-00072-of-00086.safetensors", "model.layers.106.self_attn.o_proj.input_scale": "model-00072-of-00086.safetensors", @@ -233,6 +252,7 @@ "model.layers.106.self_attn.q_proj.weight": "model-00072-of-00086.safetensors", "model.layers.106.self_attn.q_proj.weight_scale": "model-00072-of-00086.safetensors", "model.layers.106.self_attn.v_proj.input_scale": "model-00072-of-00086.safetensors", + "model.layers.106.self_attn.v_proj.v_scale": "model-00072-of-00086.safetensors", "model.layers.106.self_attn.v_proj.weight": "model-00072-of-00086.safetensors", "model.layers.106.self_attn.v_proj.weight_scale": "model-00072-of-00086.safetensors", "model.layers.107.input_layernorm.weight": "model-00073-of-00086.safetensors", @@ -247,6 +267,7 @@ "model.layers.107.mlp.up_proj.weight_scale": "model-00073-of-00086.safetensors", "model.layers.107.post_attention_layernorm.weight": "model-00073-of-00086.safetensors", "model.layers.107.self_attn.k_proj.input_scale": "model-00073-of-00086.safetensors", + "model.layers.107.self_attn.k_proj.k_scale": "model-00073-of-00086.safetensors", "model.layers.107.self_attn.k_proj.weight": "model-00073-of-00086.safetensors", "model.layers.107.self_attn.k_proj.weight_scale": "model-00073-of-00086.safetensors", "model.layers.107.self_attn.o_proj.input_scale": "model-00073-of-00086.safetensors", @@ -256,6 +277,7 @@ "model.layers.107.self_attn.q_proj.weight": "model-00073-of-00086.safetensors", "model.layers.107.self_attn.q_proj.weight_scale": "model-00073-of-00086.safetensors", "model.layers.107.self_attn.v_proj.input_scale": "model-00073-of-00086.safetensors", + "model.layers.107.self_attn.v_proj.v_scale": "model-00073-of-00086.safetensors", "model.layers.107.self_attn.v_proj.weight": "model-00073-of-00086.safetensors", "model.layers.107.self_attn.v_proj.weight_scale": "model-00073-of-00086.safetensors", "model.layers.108.input_layernorm.weight": "model-00074-of-00086.safetensors", @@ -270,6 +292,7 @@ "model.layers.108.mlp.up_proj.weight_scale": "model-00074-of-00086.safetensors", "model.layers.108.post_attention_layernorm.weight": "model-00074-of-00086.safetensors", "model.layers.108.self_attn.k_proj.input_scale": "model-00073-of-00086.safetensors", + "model.layers.108.self_attn.k_proj.k_scale": "model-00073-of-00086.safetensors", "model.layers.108.self_attn.k_proj.weight": "model-00073-of-00086.safetensors", "model.layers.108.self_attn.k_proj.weight_scale": "model-00073-of-00086.safetensors", "model.layers.108.self_attn.o_proj.input_scale": "model-00073-of-00086.safetensors", @@ -279,6 +302,7 @@ "model.layers.108.self_attn.q_proj.weight": "model-00073-of-00086.safetensors", "model.layers.108.self_attn.q_proj.weight_scale": "model-00073-of-00086.safetensors", "model.layers.108.self_attn.v_proj.input_scale": "model-00073-of-00086.safetensors", + "model.layers.108.self_attn.v_proj.v_scale": "model-00073-of-00086.safetensors", "model.layers.108.self_attn.v_proj.weight": "model-00073-of-00086.safetensors", "model.layers.108.self_attn.v_proj.weight_scale": "model-00073-of-00086.safetensors", "model.layers.109.input_layernorm.weight": "model-00075-of-00086.safetensors", @@ -293,6 +317,7 @@ "model.layers.109.mlp.up_proj.weight_scale": "model-00074-of-00086.safetensors", "model.layers.109.post_attention_layernorm.weight": "model-00075-of-00086.safetensors", "model.layers.109.self_attn.k_proj.input_scale": "model-00074-of-00086.safetensors", + "model.layers.109.self_attn.k_proj.k_scale": "model-00074-of-00086.safetensors", "model.layers.109.self_attn.k_proj.weight": "model-00074-of-00086.safetensors", "model.layers.109.self_attn.k_proj.weight_scale": "model-00074-of-00086.safetensors", "model.layers.109.self_attn.o_proj.input_scale": "model-00074-of-00086.safetensors", @@ -302,6 +327,7 @@ "model.layers.109.self_attn.q_proj.weight": "model-00074-of-00086.safetensors", "model.layers.109.self_attn.q_proj.weight_scale": "model-00074-of-00086.safetensors", "model.layers.109.self_attn.v_proj.input_scale": "model-00074-of-00086.safetensors", + "model.layers.109.self_attn.v_proj.v_scale": "model-00074-of-00086.safetensors", "model.layers.109.self_attn.v_proj.weight": "model-00074-of-00086.safetensors", "model.layers.109.self_attn.v_proj.weight_scale": "model-00074-of-00086.safetensors", "model.layers.11.input_layernorm.weight": "model-00009-of-00086.safetensors", @@ -316,6 +342,7 @@ "model.layers.11.mlp.up_proj.weight_scale": "model-00009-of-00086.safetensors", "model.layers.11.post_attention_layernorm.weight": "model-00009-of-00086.safetensors", "model.layers.11.self_attn.k_proj.input_scale": "model-00009-of-00086.safetensors", + "model.layers.11.self_attn.k_proj.k_scale": "model-00009-of-00086.safetensors", "model.layers.11.self_attn.k_proj.weight": "model-00009-of-00086.safetensors", "model.layers.11.self_attn.k_proj.weight_scale": "model-00009-of-00086.safetensors", "model.layers.11.self_attn.o_proj.input_scale": "model-00009-of-00086.safetensors", @@ -325,6 +352,7 @@ "model.layers.11.self_attn.q_proj.weight": "model-00009-of-00086.safetensors", "model.layers.11.self_attn.q_proj.weight_scale": "model-00009-of-00086.safetensors", "model.layers.11.self_attn.v_proj.input_scale": "model-00009-of-00086.safetensors", + "model.layers.11.self_attn.v_proj.v_scale": "model-00009-of-00086.safetensors", "model.layers.11.self_attn.v_proj.weight": "model-00009-of-00086.safetensors", "model.layers.11.self_attn.v_proj.weight_scale": "model-00009-of-00086.safetensors", "model.layers.110.input_layernorm.weight": "model-00075-of-00086.safetensors", @@ -339,6 +367,7 @@ "model.layers.110.mlp.up_proj.weight_scale": "model-00075-of-00086.safetensors", "model.layers.110.post_attention_layernorm.weight": "model-00075-of-00086.safetensors", "model.layers.110.self_attn.k_proj.input_scale": "model-00075-of-00086.safetensors", + "model.layers.110.self_attn.k_proj.k_scale": "model-00075-of-00086.safetensors", "model.layers.110.self_attn.k_proj.weight": "model-00075-of-00086.safetensors", "model.layers.110.self_attn.k_proj.weight_scale": "model-00075-of-00086.safetensors", "model.layers.110.self_attn.o_proj.input_scale": "model-00075-of-00086.safetensors", @@ -348,6 +377,7 @@ "model.layers.110.self_attn.q_proj.weight": "model-00075-of-00086.safetensors", "model.layers.110.self_attn.q_proj.weight_scale": "model-00075-of-00086.safetensors", "model.layers.110.self_attn.v_proj.input_scale": "model-00075-of-00086.safetensors", + "model.layers.110.self_attn.v_proj.v_scale": "model-00075-of-00086.safetensors", "model.layers.110.self_attn.v_proj.weight": "model-00075-of-00086.safetensors", "model.layers.110.self_attn.v_proj.weight_scale": "model-00075-of-00086.safetensors", "model.layers.111.input_layernorm.weight": "model-00076-of-00086.safetensors", @@ -362,6 +392,7 @@ "model.layers.111.mlp.up_proj.weight_scale": "model-00076-of-00086.safetensors", "model.layers.111.post_attention_layernorm.weight": "model-00076-of-00086.safetensors", "model.layers.111.self_attn.k_proj.input_scale": "model-00075-of-00086.safetensors", + "model.layers.111.self_attn.k_proj.k_scale": "model-00075-of-00086.safetensors", "model.layers.111.self_attn.k_proj.weight": "model-00075-of-00086.safetensors", "model.layers.111.self_attn.k_proj.weight_scale": "model-00075-of-00086.safetensors", "model.layers.111.self_attn.o_proj.input_scale": "model-00075-of-00086.safetensors", @@ -371,6 +402,7 @@ "model.layers.111.self_attn.q_proj.weight": "model-00075-of-00086.safetensors", "model.layers.111.self_attn.q_proj.weight_scale": "model-00075-of-00086.safetensors", "model.layers.111.self_attn.v_proj.input_scale": "model-00075-of-00086.safetensors", + "model.layers.111.self_attn.v_proj.v_scale": "model-00075-of-00086.safetensors", "model.layers.111.self_attn.v_proj.weight": "model-00075-of-00086.safetensors", "model.layers.111.self_attn.v_proj.weight_scale": "model-00075-of-00086.safetensors", "model.layers.112.input_layernorm.weight": "model-00077-of-00086.safetensors", @@ -385,6 +417,7 @@ "model.layers.112.mlp.up_proj.weight_scale": "model-00076-of-00086.safetensors", "model.layers.112.post_attention_layernorm.weight": "model-00077-of-00086.safetensors", "model.layers.112.self_attn.k_proj.input_scale": "model-00076-of-00086.safetensors", + "model.layers.112.self_attn.k_proj.k_scale": "model-00076-of-00086.safetensors", "model.layers.112.self_attn.k_proj.weight": "model-00076-of-00086.safetensors", "model.layers.112.self_attn.k_proj.weight_scale": "model-00076-of-00086.safetensors", "model.layers.112.self_attn.o_proj.input_scale": "model-00076-of-00086.safetensors", @@ -394,6 +427,7 @@ "model.layers.112.self_attn.q_proj.weight": "model-00076-of-00086.safetensors", "model.layers.112.self_attn.q_proj.weight_scale": "model-00076-of-00086.safetensors", "model.layers.112.self_attn.v_proj.input_scale": "model-00076-of-00086.safetensors", + "model.layers.112.self_attn.v_proj.v_scale": "model-00076-of-00086.safetensors", "model.layers.112.self_attn.v_proj.weight": "model-00076-of-00086.safetensors", "model.layers.112.self_attn.v_proj.weight_scale": "model-00076-of-00086.safetensors", "model.layers.113.input_layernorm.weight": "model-00077-of-00086.safetensors", @@ -408,6 +442,7 @@ "model.layers.113.mlp.up_proj.weight_scale": "model-00077-of-00086.safetensors", "model.layers.113.post_attention_layernorm.weight": "model-00077-of-00086.safetensors", "model.layers.113.self_attn.k_proj.input_scale": "model-00077-of-00086.safetensors", + "model.layers.113.self_attn.k_proj.k_scale": "model-00077-of-00086.safetensors", "model.layers.113.self_attn.k_proj.weight": "model-00077-of-00086.safetensors", "model.layers.113.self_attn.k_proj.weight_scale": "model-00077-of-00086.safetensors", "model.layers.113.self_attn.o_proj.input_scale": "model-00077-of-00086.safetensors", @@ -417,6 +452,7 @@ "model.layers.113.self_attn.q_proj.weight": "model-00077-of-00086.safetensors", "model.layers.113.self_attn.q_proj.weight_scale": "model-00077-of-00086.safetensors", "model.layers.113.self_attn.v_proj.input_scale": "model-00077-of-00086.safetensors", + "model.layers.113.self_attn.v_proj.v_scale": "model-00077-of-00086.safetensors", "model.layers.113.self_attn.v_proj.weight": "model-00077-of-00086.safetensors", "model.layers.113.self_attn.v_proj.weight_scale": "model-00077-of-00086.safetensors", "model.layers.114.input_layernorm.weight": "model-00078-of-00086.safetensors", @@ -431,6 +467,7 @@ "model.layers.114.mlp.up_proj.weight_scale": "model-00078-of-00086.safetensors", "model.layers.114.post_attention_layernorm.weight": "model-00078-of-00086.safetensors", "model.layers.114.self_attn.k_proj.input_scale": "model-00077-of-00086.safetensors", + "model.layers.114.self_attn.k_proj.k_scale": "model-00077-of-00086.safetensors", "model.layers.114.self_attn.k_proj.weight": "model-00077-of-00086.safetensors", "model.layers.114.self_attn.k_proj.weight_scale": "model-00077-of-00086.safetensors", "model.layers.114.self_attn.o_proj.input_scale": "model-00077-of-00086.safetensors", @@ -440,6 +477,7 @@ "model.layers.114.self_attn.q_proj.weight": "model-00077-of-00086.safetensors", "model.layers.114.self_attn.q_proj.weight_scale": "model-00077-of-00086.safetensors", "model.layers.114.self_attn.v_proj.input_scale": "model-00077-of-00086.safetensors", + "model.layers.114.self_attn.v_proj.v_scale": "model-00077-of-00086.safetensors", "model.layers.114.self_attn.v_proj.weight": "model-00077-of-00086.safetensors", "model.layers.114.self_attn.v_proj.weight_scale": "model-00077-of-00086.safetensors", "model.layers.115.input_layernorm.weight": "model-00079-of-00086.safetensors", @@ -454,6 +492,7 @@ "model.layers.115.mlp.up_proj.weight_scale": "model-00078-of-00086.safetensors", "model.layers.115.post_attention_layernorm.weight": "model-00079-of-00086.safetensors", "model.layers.115.self_attn.k_proj.input_scale": "model-00078-of-00086.safetensors", + "model.layers.115.self_attn.k_proj.k_scale": "model-00078-of-00086.safetensors", "model.layers.115.self_attn.k_proj.weight": "model-00078-of-00086.safetensors", "model.layers.115.self_attn.k_proj.weight_scale": "model-00078-of-00086.safetensors", "model.layers.115.self_attn.o_proj.input_scale": "model-00078-of-00086.safetensors", @@ -463,6 +502,7 @@ "model.layers.115.self_attn.q_proj.weight": "model-00078-of-00086.safetensors", "model.layers.115.self_attn.q_proj.weight_scale": "model-00078-of-00086.safetensors", "model.layers.115.self_attn.v_proj.input_scale": "model-00078-of-00086.safetensors", + "model.layers.115.self_attn.v_proj.v_scale": "model-00078-of-00086.safetensors", "model.layers.115.self_attn.v_proj.weight": "model-00078-of-00086.safetensors", "model.layers.115.self_attn.v_proj.weight_scale": "model-00078-of-00086.safetensors", "model.layers.116.input_layernorm.weight": "model-00079-of-00086.safetensors", @@ -477,6 +517,7 @@ "model.layers.116.mlp.up_proj.weight_scale": "model-00079-of-00086.safetensors", "model.layers.116.post_attention_layernorm.weight": "model-00079-of-00086.safetensors", "model.layers.116.self_attn.k_proj.input_scale": "model-00079-of-00086.safetensors", + "model.layers.116.self_attn.k_proj.k_scale": "model-00079-of-00086.safetensors", "model.layers.116.self_attn.k_proj.weight": "model-00079-of-00086.safetensors", "model.layers.116.self_attn.k_proj.weight_scale": "model-00079-of-00086.safetensors", "model.layers.116.self_attn.o_proj.input_scale": "model-00079-of-00086.safetensors", @@ -486,6 +527,7 @@ "model.layers.116.self_attn.q_proj.weight": "model-00079-of-00086.safetensors", "model.layers.116.self_attn.q_proj.weight_scale": "model-00079-of-00086.safetensors", "model.layers.116.self_attn.v_proj.input_scale": "model-00079-of-00086.safetensors", + "model.layers.116.self_attn.v_proj.v_scale": "model-00079-of-00086.safetensors", "model.layers.116.self_attn.v_proj.weight": "model-00079-of-00086.safetensors", "model.layers.116.self_attn.v_proj.weight_scale": "model-00079-of-00086.safetensors", "model.layers.117.input_layernorm.weight": "model-00080-of-00086.safetensors", @@ -500,6 +542,7 @@ "model.layers.117.mlp.up_proj.weight_scale": "model-00080-of-00086.safetensors", "model.layers.117.post_attention_layernorm.weight": "model-00080-of-00086.safetensors", "model.layers.117.self_attn.k_proj.input_scale": "model-00079-of-00086.safetensors", + "model.layers.117.self_attn.k_proj.k_scale": "model-00079-of-00086.safetensors", "model.layers.117.self_attn.k_proj.weight": "model-00079-of-00086.safetensors", "model.layers.117.self_attn.k_proj.weight_scale": "model-00079-of-00086.safetensors", "model.layers.117.self_attn.o_proj.input_scale": "model-00079-of-00086.safetensors", @@ -509,6 +552,7 @@ "model.layers.117.self_attn.q_proj.weight": "model-00079-of-00086.safetensors", "model.layers.117.self_attn.q_proj.weight_scale": "model-00079-of-00086.safetensors", "model.layers.117.self_attn.v_proj.input_scale": "model-00079-of-00086.safetensors", + "model.layers.117.self_attn.v_proj.v_scale": "model-00079-of-00086.safetensors", "model.layers.117.self_attn.v_proj.weight": "model-00079-of-00086.safetensors", "model.layers.117.self_attn.v_proj.weight_scale": "model-00079-of-00086.safetensors", "model.layers.118.input_layernorm.weight": "model-00081-of-00086.safetensors", @@ -523,6 +567,7 @@ "model.layers.118.mlp.up_proj.weight_scale": "model-00080-of-00086.safetensors", "model.layers.118.post_attention_layernorm.weight": "model-00081-of-00086.safetensors", "model.layers.118.self_attn.k_proj.input_scale": "model-00080-of-00086.safetensors", + "model.layers.118.self_attn.k_proj.k_scale": "model-00080-of-00086.safetensors", "model.layers.118.self_attn.k_proj.weight": "model-00080-of-00086.safetensors", "model.layers.118.self_attn.k_proj.weight_scale": "model-00080-of-00086.safetensors", "model.layers.118.self_attn.o_proj.input_scale": "model-00080-of-00086.safetensors", @@ -532,6 +577,7 @@ "model.layers.118.self_attn.q_proj.weight": "model-00080-of-00086.safetensors", "model.layers.118.self_attn.q_proj.weight_scale": "model-00080-of-00086.safetensors", "model.layers.118.self_attn.v_proj.input_scale": "model-00080-of-00086.safetensors", + "model.layers.118.self_attn.v_proj.v_scale": "model-00080-of-00086.safetensors", "model.layers.118.self_attn.v_proj.weight": "model-00080-of-00086.safetensors", "model.layers.118.self_attn.v_proj.weight_scale": "model-00080-of-00086.safetensors", "model.layers.119.input_layernorm.weight": "model-00081-of-00086.safetensors", @@ -546,6 +592,7 @@ "model.layers.119.mlp.up_proj.weight_scale": "model-00081-of-00086.safetensors", "model.layers.119.post_attention_layernorm.weight": "model-00081-of-00086.safetensors", "model.layers.119.self_attn.k_proj.input_scale": "model-00081-of-00086.safetensors", + "model.layers.119.self_attn.k_proj.k_scale": "model-00081-of-00086.safetensors", "model.layers.119.self_attn.k_proj.weight": "model-00081-of-00086.safetensors", "model.layers.119.self_attn.k_proj.weight_scale": "model-00081-of-00086.safetensors", "model.layers.119.self_attn.o_proj.input_scale": "model-00081-of-00086.safetensors", @@ -555,6 +602,7 @@ "model.layers.119.self_attn.q_proj.weight": "model-00081-of-00086.safetensors", "model.layers.119.self_attn.q_proj.weight_scale": "model-00081-of-00086.safetensors", "model.layers.119.self_attn.v_proj.input_scale": "model-00081-of-00086.safetensors", + "model.layers.119.self_attn.v_proj.v_scale": "model-00081-of-00086.safetensors", "model.layers.119.self_attn.v_proj.weight": "model-00081-of-00086.safetensors", "model.layers.119.self_attn.v_proj.weight_scale": "model-00081-of-00086.safetensors", "model.layers.12.input_layernorm.weight": "model-00010-of-00086.safetensors", @@ -569,6 +617,7 @@ "model.layers.12.mlp.up_proj.weight_scale": "model-00010-of-00086.safetensors", "model.layers.12.post_attention_layernorm.weight": "model-00010-of-00086.safetensors", "model.layers.12.self_attn.k_proj.input_scale": "model-00009-of-00086.safetensors", + "model.layers.12.self_attn.k_proj.k_scale": "model-00009-of-00086.safetensors", "model.layers.12.self_attn.k_proj.weight": "model-00009-of-00086.safetensors", "model.layers.12.self_attn.k_proj.weight_scale": "model-00009-of-00086.safetensors", "model.layers.12.self_attn.o_proj.input_scale": "model-00009-of-00086.safetensors", @@ -578,6 +627,7 @@ "model.layers.12.self_attn.q_proj.weight": "model-00009-of-00086.safetensors", "model.layers.12.self_attn.q_proj.weight_scale": "model-00009-of-00086.safetensors", "model.layers.12.self_attn.v_proj.input_scale": "model-00009-of-00086.safetensors", + "model.layers.12.self_attn.v_proj.v_scale": "model-00009-of-00086.safetensors", "model.layers.12.self_attn.v_proj.weight": "model-00009-of-00086.safetensors", "model.layers.12.self_attn.v_proj.weight_scale": "model-00009-of-00086.safetensors", "model.layers.120.input_layernorm.weight": "model-00082-of-00086.safetensors", @@ -592,6 +642,7 @@ "model.layers.120.mlp.up_proj.weight_scale": "model-00082-of-00086.safetensors", "model.layers.120.post_attention_layernorm.weight": "model-00082-of-00086.safetensors", "model.layers.120.self_attn.k_proj.input_scale": "model-00081-of-00086.safetensors", + "model.layers.120.self_attn.k_proj.k_scale": "model-00081-of-00086.safetensors", "model.layers.120.self_attn.k_proj.weight": "model-00081-of-00086.safetensors", "model.layers.120.self_attn.k_proj.weight_scale": "model-00081-of-00086.safetensors", "model.layers.120.self_attn.o_proj.input_scale": "model-00081-of-00086.safetensors", @@ -601,6 +652,7 @@ "model.layers.120.self_attn.q_proj.weight": "model-00081-of-00086.safetensors", "model.layers.120.self_attn.q_proj.weight_scale": "model-00081-of-00086.safetensors", "model.layers.120.self_attn.v_proj.input_scale": "model-00081-of-00086.safetensors", + "model.layers.120.self_attn.v_proj.v_scale": "model-00081-of-00086.safetensors", "model.layers.120.self_attn.v_proj.weight": "model-00081-of-00086.safetensors", "model.layers.120.self_attn.v_proj.weight_scale": "model-00081-of-00086.safetensors", "model.layers.121.input_layernorm.weight": "model-00083-of-00086.safetensors", @@ -615,6 +667,7 @@ "model.layers.121.mlp.up_proj.weight_scale": "model-00082-of-00086.safetensors", "model.layers.121.post_attention_layernorm.weight": "model-00083-of-00086.safetensors", "model.layers.121.self_attn.k_proj.input_scale": "model-00082-of-00086.safetensors", + "model.layers.121.self_attn.k_proj.k_scale": "model-00082-of-00086.safetensors", "model.layers.121.self_attn.k_proj.weight": "model-00082-of-00086.safetensors", "model.layers.121.self_attn.k_proj.weight_scale": "model-00082-of-00086.safetensors", "model.layers.121.self_attn.o_proj.input_scale": "model-00082-of-00086.safetensors", @@ -624,6 +677,7 @@ "model.layers.121.self_attn.q_proj.weight": "model-00082-of-00086.safetensors", "model.layers.121.self_attn.q_proj.weight_scale": "model-00082-of-00086.safetensors", "model.layers.121.self_attn.v_proj.input_scale": "model-00082-of-00086.safetensors", + "model.layers.121.self_attn.v_proj.v_scale": "model-00082-of-00086.safetensors", "model.layers.121.self_attn.v_proj.weight": "model-00082-of-00086.safetensors", "model.layers.121.self_attn.v_proj.weight_scale": "model-00082-of-00086.safetensors", "model.layers.122.input_layernorm.weight": "model-00083-of-00086.safetensors", @@ -638,6 +692,7 @@ "model.layers.122.mlp.up_proj.weight_scale": "model-00083-of-00086.safetensors", "model.layers.122.post_attention_layernorm.weight": "model-00083-of-00086.safetensors", "model.layers.122.self_attn.k_proj.input_scale": "model-00083-of-00086.safetensors", + "model.layers.122.self_attn.k_proj.k_scale": "model-00083-of-00086.safetensors", "model.layers.122.self_attn.k_proj.weight": "model-00083-of-00086.safetensors", "model.layers.122.self_attn.k_proj.weight_scale": "model-00083-of-00086.safetensors", "model.layers.122.self_attn.o_proj.input_scale": "model-00083-of-00086.safetensors", @@ -647,6 +702,7 @@ "model.layers.122.self_attn.q_proj.weight": "model-00083-of-00086.safetensors", "model.layers.122.self_attn.q_proj.weight_scale": "model-00083-of-00086.safetensors", "model.layers.122.self_attn.v_proj.input_scale": "model-00083-of-00086.safetensors", + "model.layers.122.self_attn.v_proj.v_scale": "model-00083-of-00086.safetensors", "model.layers.122.self_attn.v_proj.weight": "model-00083-of-00086.safetensors", "model.layers.122.self_attn.v_proj.weight_scale": "model-00083-of-00086.safetensors", "model.layers.123.input_layernorm.weight": "model-00084-of-00086.safetensors", @@ -661,6 +717,7 @@ "model.layers.123.mlp.up_proj.weight_scale": "model-00084-of-00086.safetensors", "model.layers.123.post_attention_layernorm.weight": "model-00084-of-00086.safetensors", "model.layers.123.self_attn.k_proj.input_scale": "model-00083-of-00086.safetensors", + "model.layers.123.self_attn.k_proj.k_scale": "model-00083-of-00086.safetensors", "model.layers.123.self_attn.k_proj.weight": "model-00083-of-00086.safetensors", "model.layers.123.self_attn.k_proj.weight_scale": "model-00083-of-00086.safetensors", "model.layers.123.self_attn.o_proj.input_scale": "model-00083-of-00086.safetensors", @@ -670,6 +727,7 @@ "model.layers.123.self_attn.q_proj.weight": "model-00083-of-00086.safetensors", "model.layers.123.self_attn.q_proj.weight_scale": "model-00083-of-00086.safetensors", "model.layers.123.self_attn.v_proj.input_scale": "model-00083-of-00086.safetensors", + "model.layers.123.self_attn.v_proj.v_scale": "model-00083-of-00086.safetensors", "model.layers.123.self_attn.v_proj.weight": "model-00083-of-00086.safetensors", "model.layers.123.self_attn.v_proj.weight_scale": "model-00083-of-00086.safetensors", "model.layers.124.input_layernorm.weight": "model-00085-of-00086.safetensors", @@ -684,6 +742,7 @@ "model.layers.124.mlp.up_proj.weight_scale": "model-00084-of-00086.safetensors", "model.layers.124.post_attention_layernorm.weight": "model-00085-of-00086.safetensors", "model.layers.124.self_attn.k_proj.input_scale": "model-00084-of-00086.safetensors", + "model.layers.124.self_attn.k_proj.k_scale": "model-00084-of-00086.safetensors", "model.layers.124.self_attn.k_proj.weight": "model-00084-of-00086.safetensors", "model.layers.124.self_attn.k_proj.weight_scale": "model-00084-of-00086.safetensors", "model.layers.124.self_attn.o_proj.input_scale": "model-00084-of-00086.safetensors", @@ -693,6 +752,7 @@ "model.layers.124.self_attn.q_proj.weight": "model-00084-of-00086.safetensors", "model.layers.124.self_attn.q_proj.weight_scale": "model-00084-of-00086.safetensors", "model.layers.124.self_attn.v_proj.input_scale": "model-00084-of-00086.safetensors", + "model.layers.124.self_attn.v_proj.v_scale": "model-00084-of-00086.safetensors", "model.layers.124.self_attn.v_proj.weight": "model-00084-of-00086.safetensors", "model.layers.124.self_attn.v_proj.weight_scale": "model-00084-of-00086.safetensors", "model.layers.125.input_layernorm.weight": "model-00085-of-00086.safetensors", @@ -707,6 +767,7 @@ "model.layers.125.mlp.up_proj.weight_scale": "model-00085-of-00086.safetensors", "model.layers.125.post_attention_layernorm.weight": "model-00085-of-00086.safetensors", "model.layers.125.self_attn.k_proj.input_scale": "model-00085-of-00086.safetensors", + "model.layers.125.self_attn.k_proj.k_scale": "model-00085-of-00086.safetensors", "model.layers.125.self_attn.k_proj.weight": "model-00085-of-00086.safetensors", "model.layers.125.self_attn.k_proj.weight_scale": "model-00085-of-00086.safetensors", "model.layers.125.self_attn.o_proj.input_scale": "model-00085-of-00086.safetensors", @@ -716,6 +777,7 @@ "model.layers.125.self_attn.q_proj.weight": "model-00085-of-00086.safetensors", "model.layers.125.self_attn.q_proj.weight_scale": "model-00085-of-00086.safetensors", "model.layers.125.self_attn.v_proj.input_scale": "model-00085-of-00086.safetensors", + "model.layers.125.self_attn.v_proj.v_scale": "model-00085-of-00086.safetensors", "model.layers.125.self_attn.v_proj.weight": "model-00085-of-00086.safetensors", "model.layers.125.self_attn.v_proj.weight_scale": "model-00085-of-00086.safetensors", "model.layers.13.input_layernorm.weight": "model-00011-of-00086.safetensors", @@ -730,6 +792,7 @@ "model.layers.13.mlp.up_proj.weight_scale": "model-00010-of-00086.safetensors", "model.layers.13.post_attention_layernorm.weight": "model-00011-of-00086.safetensors", "model.layers.13.self_attn.k_proj.input_scale": "model-00010-of-00086.safetensors", + "model.layers.13.self_attn.k_proj.k_scale": "model-00010-of-00086.safetensors", "model.layers.13.self_attn.k_proj.weight": "model-00010-of-00086.safetensors", "model.layers.13.self_attn.k_proj.weight_scale": "model-00010-of-00086.safetensors", "model.layers.13.self_attn.o_proj.input_scale": "model-00010-of-00086.safetensors", @@ -739,6 +802,7 @@ "model.layers.13.self_attn.q_proj.weight": "model-00010-of-00086.safetensors", "model.layers.13.self_attn.q_proj.weight_scale": "model-00010-of-00086.safetensors", "model.layers.13.self_attn.v_proj.input_scale": "model-00010-of-00086.safetensors", + "model.layers.13.self_attn.v_proj.v_scale": "model-00010-of-00086.safetensors", "model.layers.13.self_attn.v_proj.weight": "model-00010-of-00086.safetensors", "model.layers.13.self_attn.v_proj.weight_scale": "model-00010-of-00086.safetensors", "model.layers.14.input_layernorm.weight": "model-00011-of-00086.safetensors", @@ -753,6 +817,7 @@ "model.layers.14.mlp.up_proj.weight_scale": "model-00011-of-00086.safetensors", "model.layers.14.post_attention_layernorm.weight": "model-00011-of-00086.safetensors", "model.layers.14.self_attn.k_proj.input_scale": "model-00011-of-00086.safetensors", + "model.layers.14.self_attn.k_proj.k_scale": "model-00011-of-00086.safetensors", "model.layers.14.self_attn.k_proj.weight": "model-00011-of-00086.safetensors", "model.layers.14.self_attn.k_proj.weight_scale": "model-00011-of-00086.safetensors", "model.layers.14.self_attn.o_proj.input_scale": "model-00011-of-00086.safetensors", @@ -762,6 +827,7 @@ "model.layers.14.self_attn.q_proj.weight": "model-00011-of-00086.safetensors", "model.layers.14.self_attn.q_proj.weight_scale": "model-00011-of-00086.safetensors", "model.layers.14.self_attn.v_proj.input_scale": "model-00011-of-00086.safetensors", + "model.layers.14.self_attn.v_proj.v_scale": "model-00011-of-00086.safetensors", "model.layers.14.self_attn.v_proj.weight": "model-00011-of-00086.safetensors", "model.layers.14.self_attn.v_proj.weight_scale": "model-00011-of-00086.safetensors", "model.layers.15.input_layernorm.weight": "model-00012-of-00086.safetensors", @@ -776,6 +842,7 @@ "model.layers.15.mlp.up_proj.weight_scale": "model-00012-of-00086.safetensors", "model.layers.15.post_attention_layernorm.weight": "model-00012-of-00086.safetensors", "model.layers.15.self_attn.k_proj.input_scale": "model-00011-of-00086.safetensors", + "model.layers.15.self_attn.k_proj.k_scale": "model-00011-of-00086.safetensors", "model.layers.15.self_attn.k_proj.weight": "model-00011-of-00086.safetensors", "model.layers.15.self_attn.k_proj.weight_scale": "model-00011-of-00086.safetensors", "model.layers.15.self_attn.o_proj.input_scale": "model-00011-of-00086.safetensors", @@ -785,6 +852,7 @@ "model.layers.15.self_attn.q_proj.weight": "model-00011-of-00086.safetensors", "model.layers.15.self_attn.q_proj.weight_scale": "model-00011-of-00086.safetensors", "model.layers.15.self_attn.v_proj.input_scale": "model-00011-of-00086.safetensors", + "model.layers.15.self_attn.v_proj.v_scale": "model-00011-of-00086.safetensors", "model.layers.15.self_attn.v_proj.weight": "model-00011-of-00086.safetensors", "model.layers.15.self_attn.v_proj.weight_scale": "model-00011-of-00086.safetensors", "model.layers.16.input_layernorm.weight": "model-00013-of-00086.safetensors", @@ -799,6 +867,7 @@ "model.layers.16.mlp.up_proj.weight_scale": "model-00012-of-00086.safetensors", "model.layers.16.post_attention_layernorm.weight": "model-00013-of-00086.safetensors", "model.layers.16.self_attn.k_proj.input_scale": "model-00012-of-00086.safetensors", + "model.layers.16.self_attn.k_proj.k_scale": "model-00012-of-00086.safetensors", "model.layers.16.self_attn.k_proj.weight": "model-00012-of-00086.safetensors", "model.layers.16.self_attn.k_proj.weight_scale": "model-00012-of-00086.safetensors", "model.layers.16.self_attn.o_proj.input_scale": "model-00012-of-00086.safetensors", @@ -808,6 +877,7 @@ "model.layers.16.self_attn.q_proj.weight": "model-00012-of-00086.safetensors", "model.layers.16.self_attn.q_proj.weight_scale": "model-00012-of-00086.safetensors", "model.layers.16.self_attn.v_proj.input_scale": "model-00012-of-00086.safetensors", + "model.layers.16.self_attn.v_proj.v_scale": "model-00012-of-00086.safetensors", "model.layers.16.self_attn.v_proj.weight": "model-00012-of-00086.safetensors", "model.layers.16.self_attn.v_proj.weight_scale": "model-00012-of-00086.safetensors", "model.layers.17.input_layernorm.weight": "model-00013-of-00086.safetensors", @@ -822,6 +892,7 @@ "model.layers.17.mlp.up_proj.weight_scale": "model-00013-of-00086.safetensors", "model.layers.17.post_attention_layernorm.weight": "model-00013-of-00086.safetensors", "model.layers.17.self_attn.k_proj.input_scale": "model-00013-of-00086.safetensors", + "model.layers.17.self_attn.k_proj.k_scale": "model-00013-of-00086.safetensors", "model.layers.17.self_attn.k_proj.weight": "model-00013-of-00086.safetensors", "model.layers.17.self_attn.k_proj.weight_scale": "model-00013-of-00086.safetensors", "model.layers.17.self_attn.o_proj.input_scale": "model-00013-of-00086.safetensors", @@ -831,6 +902,7 @@ "model.layers.17.self_attn.q_proj.weight": "model-00013-of-00086.safetensors", "model.layers.17.self_attn.q_proj.weight_scale": "model-00013-of-00086.safetensors", "model.layers.17.self_attn.v_proj.input_scale": "model-00013-of-00086.safetensors", + "model.layers.17.self_attn.v_proj.v_scale": "model-00013-of-00086.safetensors", "model.layers.17.self_attn.v_proj.weight": "model-00013-of-00086.safetensors", "model.layers.17.self_attn.v_proj.weight_scale": "model-00013-of-00086.safetensors", "model.layers.18.input_layernorm.weight": "model-00014-of-00086.safetensors", @@ -845,6 +917,7 @@ "model.layers.18.mlp.up_proj.weight_scale": "model-00014-of-00086.safetensors", "model.layers.18.post_attention_layernorm.weight": "model-00014-of-00086.safetensors", "model.layers.18.self_attn.k_proj.input_scale": "model-00013-of-00086.safetensors", + "model.layers.18.self_attn.k_proj.k_scale": "model-00013-of-00086.safetensors", "model.layers.18.self_attn.k_proj.weight": "model-00013-of-00086.safetensors", "model.layers.18.self_attn.k_proj.weight_scale": "model-00013-of-00086.safetensors", "model.layers.18.self_attn.o_proj.input_scale": "model-00013-of-00086.safetensors", @@ -854,6 +927,7 @@ "model.layers.18.self_attn.q_proj.weight": "model-00013-of-00086.safetensors", "model.layers.18.self_attn.q_proj.weight_scale": "model-00013-of-00086.safetensors", "model.layers.18.self_attn.v_proj.input_scale": "model-00013-of-00086.safetensors", + "model.layers.18.self_attn.v_proj.v_scale": "model-00013-of-00086.safetensors", "model.layers.18.self_attn.v_proj.weight": "model-00013-of-00086.safetensors", "model.layers.18.self_attn.v_proj.weight_scale": "model-00013-of-00086.safetensors", "model.layers.19.input_layernorm.weight": "model-00015-of-00086.safetensors", @@ -868,6 +942,7 @@ "model.layers.19.mlp.up_proj.weight_scale": "model-00014-of-00086.safetensors", "model.layers.19.post_attention_layernorm.weight": "model-00015-of-00086.safetensors", "model.layers.19.self_attn.k_proj.input_scale": "model-00014-of-00086.safetensors", + "model.layers.19.self_attn.k_proj.k_scale": "model-00014-of-00086.safetensors", "model.layers.19.self_attn.k_proj.weight": "model-00014-of-00086.safetensors", "model.layers.19.self_attn.k_proj.weight_scale": "model-00014-of-00086.safetensors", "model.layers.19.self_attn.o_proj.input_scale": "model-00014-of-00086.safetensors", @@ -877,6 +952,7 @@ "model.layers.19.self_attn.q_proj.weight": "model-00014-of-00086.safetensors", "model.layers.19.self_attn.q_proj.weight_scale": "model-00014-of-00086.safetensors", "model.layers.19.self_attn.v_proj.input_scale": "model-00014-of-00086.safetensors", + "model.layers.19.self_attn.v_proj.v_scale": "model-00014-of-00086.safetensors", "model.layers.19.self_attn.v_proj.weight": "model-00014-of-00086.safetensors", "model.layers.19.self_attn.v_proj.weight_scale": "model-00014-of-00086.safetensors", "model.layers.2.input_layernorm.weight": "model-00003-of-00086.safetensors", @@ -891,6 +967,7 @@ "model.layers.2.mlp.up_proj.weight_scale": "model-00003-of-00086.safetensors", "model.layers.2.post_attention_layernorm.weight": "model-00003-of-00086.safetensors", "model.layers.2.self_attn.k_proj.input_scale": "model-00003-of-00086.safetensors", + "model.layers.2.self_attn.k_proj.k_scale": "model-00003-of-00086.safetensors", "model.layers.2.self_attn.k_proj.weight": "model-00003-of-00086.safetensors", "model.layers.2.self_attn.k_proj.weight_scale": "model-00003-of-00086.safetensors", "model.layers.2.self_attn.o_proj.input_scale": "model-00003-of-00086.safetensors", @@ -900,6 +977,7 @@ "model.layers.2.self_attn.q_proj.weight": "model-00003-of-00086.safetensors", "model.layers.2.self_attn.q_proj.weight_scale": "model-00003-of-00086.safetensors", "model.layers.2.self_attn.v_proj.input_scale": "model-00003-of-00086.safetensors", + "model.layers.2.self_attn.v_proj.v_scale": "model-00003-of-00086.safetensors", "model.layers.2.self_attn.v_proj.weight": "model-00003-of-00086.safetensors", "model.layers.2.self_attn.v_proj.weight_scale": "model-00003-of-00086.safetensors", "model.layers.20.input_layernorm.weight": "model-00015-of-00086.safetensors", @@ -914,6 +992,7 @@ "model.layers.20.mlp.up_proj.weight_scale": "model-00015-of-00086.safetensors", "model.layers.20.post_attention_layernorm.weight": "model-00015-of-00086.safetensors", "model.layers.20.self_attn.k_proj.input_scale": "model-00015-of-00086.safetensors", + "model.layers.20.self_attn.k_proj.k_scale": "model-00015-of-00086.safetensors", "model.layers.20.self_attn.k_proj.weight": "model-00015-of-00086.safetensors", "model.layers.20.self_attn.k_proj.weight_scale": "model-00015-of-00086.safetensors", "model.layers.20.self_attn.o_proj.input_scale": "model-00015-of-00086.safetensors", @@ -923,6 +1002,7 @@ "model.layers.20.self_attn.q_proj.weight": "model-00015-of-00086.safetensors", "model.layers.20.self_attn.q_proj.weight_scale": "model-00015-of-00086.safetensors", "model.layers.20.self_attn.v_proj.input_scale": "model-00015-of-00086.safetensors", + "model.layers.20.self_attn.v_proj.v_scale": "model-00015-of-00086.safetensors", "model.layers.20.self_attn.v_proj.weight": "model-00015-of-00086.safetensors", "model.layers.20.self_attn.v_proj.weight_scale": "model-00015-of-00086.safetensors", "model.layers.21.input_layernorm.weight": "model-00016-of-00086.safetensors", @@ -937,6 +1017,7 @@ "model.layers.21.mlp.up_proj.weight_scale": "model-00016-of-00086.safetensors", "model.layers.21.post_attention_layernorm.weight": "model-00016-of-00086.safetensors", "model.layers.21.self_attn.k_proj.input_scale": "model-00015-of-00086.safetensors", + "model.layers.21.self_attn.k_proj.k_scale": "model-00015-of-00086.safetensors", "model.layers.21.self_attn.k_proj.weight": "model-00015-of-00086.safetensors", "model.layers.21.self_attn.k_proj.weight_scale": "model-00015-of-00086.safetensors", "model.layers.21.self_attn.o_proj.input_scale": "model-00015-of-00086.safetensors", @@ -946,6 +1027,7 @@ "model.layers.21.self_attn.q_proj.weight": "model-00015-of-00086.safetensors", "model.layers.21.self_attn.q_proj.weight_scale": "model-00015-of-00086.safetensors", "model.layers.21.self_attn.v_proj.input_scale": "model-00015-of-00086.safetensors", + "model.layers.21.self_attn.v_proj.v_scale": "model-00015-of-00086.safetensors", "model.layers.21.self_attn.v_proj.weight": "model-00015-of-00086.safetensors", "model.layers.21.self_attn.v_proj.weight_scale": "model-00015-of-00086.safetensors", "model.layers.22.input_layernorm.weight": "model-00017-of-00086.safetensors", @@ -960,6 +1042,7 @@ "model.layers.22.mlp.up_proj.weight_scale": "model-00016-of-00086.safetensors", "model.layers.22.post_attention_layernorm.weight": "model-00017-of-00086.safetensors", "model.layers.22.self_attn.k_proj.input_scale": "model-00016-of-00086.safetensors", + "model.layers.22.self_attn.k_proj.k_scale": "model-00016-of-00086.safetensors", "model.layers.22.self_attn.k_proj.weight": "model-00016-of-00086.safetensors", "model.layers.22.self_attn.k_proj.weight_scale": "model-00016-of-00086.safetensors", "model.layers.22.self_attn.o_proj.input_scale": "model-00016-of-00086.safetensors", @@ -969,6 +1052,7 @@ "model.layers.22.self_attn.q_proj.weight": "model-00016-of-00086.safetensors", "model.layers.22.self_attn.q_proj.weight_scale": "model-00016-of-00086.safetensors", "model.layers.22.self_attn.v_proj.input_scale": "model-00016-of-00086.safetensors", + "model.layers.22.self_attn.v_proj.v_scale": "model-00016-of-00086.safetensors", "model.layers.22.self_attn.v_proj.weight": "model-00016-of-00086.safetensors", "model.layers.22.self_attn.v_proj.weight_scale": "model-00016-of-00086.safetensors", "model.layers.23.input_layernorm.weight": "model-00017-of-00086.safetensors", @@ -983,6 +1067,7 @@ "model.layers.23.mlp.up_proj.weight_scale": "model-00017-of-00086.safetensors", "model.layers.23.post_attention_layernorm.weight": "model-00017-of-00086.safetensors", "model.layers.23.self_attn.k_proj.input_scale": "model-00017-of-00086.safetensors", + "model.layers.23.self_attn.k_proj.k_scale": "model-00017-of-00086.safetensors", "model.layers.23.self_attn.k_proj.weight": "model-00017-of-00086.safetensors", "model.layers.23.self_attn.k_proj.weight_scale": "model-00017-of-00086.safetensors", "model.layers.23.self_attn.o_proj.input_scale": "model-00017-of-00086.safetensors", @@ -992,6 +1077,7 @@ "model.layers.23.self_attn.q_proj.weight": "model-00017-of-00086.safetensors", "model.layers.23.self_attn.q_proj.weight_scale": "model-00017-of-00086.safetensors", "model.layers.23.self_attn.v_proj.input_scale": "model-00017-of-00086.safetensors", + "model.layers.23.self_attn.v_proj.v_scale": "model-00017-of-00086.safetensors", "model.layers.23.self_attn.v_proj.weight": "model-00017-of-00086.safetensors", "model.layers.23.self_attn.v_proj.weight_scale": "model-00017-of-00086.safetensors", "model.layers.24.input_layernorm.weight": "model-00018-of-00086.safetensors", @@ -1006,6 +1092,7 @@ "model.layers.24.mlp.up_proj.weight_scale": "model-00018-of-00086.safetensors", "model.layers.24.post_attention_layernorm.weight": "model-00018-of-00086.safetensors", "model.layers.24.self_attn.k_proj.input_scale": "model-00017-of-00086.safetensors", + "model.layers.24.self_attn.k_proj.k_scale": "model-00017-of-00086.safetensors", "model.layers.24.self_attn.k_proj.weight": "model-00017-of-00086.safetensors", "model.layers.24.self_attn.k_proj.weight_scale": "model-00017-of-00086.safetensors", "model.layers.24.self_attn.o_proj.input_scale": "model-00017-of-00086.safetensors", @@ -1015,6 +1102,7 @@ "model.layers.24.self_attn.q_proj.weight": "model-00017-of-00086.safetensors", "model.layers.24.self_attn.q_proj.weight_scale": "model-00017-of-00086.safetensors", "model.layers.24.self_attn.v_proj.input_scale": "model-00017-of-00086.safetensors", + "model.layers.24.self_attn.v_proj.v_scale": "model-00017-of-00086.safetensors", "model.layers.24.self_attn.v_proj.weight": "model-00017-of-00086.safetensors", "model.layers.24.self_attn.v_proj.weight_scale": "model-00017-of-00086.safetensors", "model.layers.25.input_layernorm.weight": "model-00019-of-00086.safetensors", @@ -1029,6 +1117,7 @@ "model.layers.25.mlp.up_proj.weight_scale": "model-00018-of-00086.safetensors", "model.layers.25.post_attention_layernorm.weight": "model-00019-of-00086.safetensors", "model.layers.25.self_attn.k_proj.input_scale": "model-00018-of-00086.safetensors", + "model.layers.25.self_attn.k_proj.k_scale": "model-00018-of-00086.safetensors", "model.layers.25.self_attn.k_proj.weight": "model-00018-of-00086.safetensors", "model.layers.25.self_attn.k_proj.weight_scale": "model-00018-of-00086.safetensors", "model.layers.25.self_attn.o_proj.input_scale": "model-00018-of-00086.safetensors", @@ -1038,6 +1127,7 @@ "model.layers.25.self_attn.q_proj.weight": "model-00018-of-00086.safetensors", "model.layers.25.self_attn.q_proj.weight_scale": "model-00018-of-00086.safetensors", "model.layers.25.self_attn.v_proj.input_scale": "model-00018-of-00086.safetensors", + "model.layers.25.self_attn.v_proj.v_scale": "model-00018-of-00086.safetensors", "model.layers.25.self_attn.v_proj.weight": "model-00018-of-00086.safetensors", "model.layers.25.self_attn.v_proj.weight_scale": "model-00018-of-00086.safetensors", "model.layers.26.input_layernorm.weight": "model-00019-of-00086.safetensors", @@ -1052,6 +1142,7 @@ "model.layers.26.mlp.up_proj.weight_scale": "model-00019-of-00086.safetensors", "model.layers.26.post_attention_layernorm.weight": "model-00019-of-00086.safetensors", "model.layers.26.self_attn.k_proj.input_scale": "model-00019-of-00086.safetensors", + "model.layers.26.self_attn.k_proj.k_scale": "model-00019-of-00086.safetensors", "model.layers.26.self_attn.k_proj.weight": "model-00019-of-00086.safetensors", "model.layers.26.self_attn.k_proj.weight_scale": "model-00019-of-00086.safetensors", "model.layers.26.self_attn.o_proj.input_scale": "model-00019-of-00086.safetensors", @@ -1061,6 +1152,7 @@ "model.layers.26.self_attn.q_proj.weight": "model-00019-of-00086.safetensors", "model.layers.26.self_attn.q_proj.weight_scale": "model-00019-of-00086.safetensors", "model.layers.26.self_attn.v_proj.input_scale": "model-00019-of-00086.safetensors", + "model.layers.26.self_attn.v_proj.v_scale": "model-00019-of-00086.safetensors", "model.layers.26.self_attn.v_proj.weight": "model-00019-of-00086.safetensors", "model.layers.26.self_attn.v_proj.weight_scale": "model-00019-of-00086.safetensors", "model.layers.27.input_layernorm.weight": "model-00020-of-00086.safetensors", @@ -1075,6 +1167,7 @@ "model.layers.27.mlp.up_proj.weight_scale": "model-00020-of-00086.safetensors", "model.layers.27.post_attention_layernorm.weight": "model-00020-of-00086.safetensors", "model.layers.27.self_attn.k_proj.input_scale": "model-00019-of-00086.safetensors", + "model.layers.27.self_attn.k_proj.k_scale": "model-00019-of-00086.safetensors", "model.layers.27.self_attn.k_proj.weight": "model-00019-of-00086.safetensors", "model.layers.27.self_attn.k_proj.weight_scale": "model-00019-of-00086.safetensors", "model.layers.27.self_attn.o_proj.input_scale": "model-00019-of-00086.safetensors", @@ -1084,6 +1177,7 @@ "model.layers.27.self_attn.q_proj.weight": "model-00019-of-00086.safetensors", "model.layers.27.self_attn.q_proj.weight_scale": "model-00019-of-00086.safetensors", "model.layers.27.self_attn.v_proj.input_scale": "model-00019-of-00086.safetensors", + "model.layers.27.self_attn.v_proj.v_scale": "model-00019-of-00086.safetensors", "model.layers.27.self_attn.v_proj.weight": "model-00019-of-00086.safetensors", "model.layers.27.self_attn.v_proj.weight_scale": "model-00019-of-00086.safetensors", "model.layers.28.input_layernorm.weight": "model-00021-of-00086.safetensors", @@ -1098,6 +1192,7 @@ "model.layers.28.mlp.up_proj.weight_scale": "model-00020-of-00086.safetensors", "model.layers.28.post_attention_layernorm.weight": "model-00021-of-00086.safetensors", "model.layers.28.self_attn.k_proj.input_scale": "model-00020-of-00086.safetensors", + "model.layers.28.self_attn.k_proj.k_scale": "model-00020-of-00086.safetensors", "model.layers.28.self_attn.k_proj.weight": "model-00020-of-00086.safetensors", "model.layers.28.self_attn.k_proj.weight_scale": "model-00020-of-00086.safetensors", "model.layers.28.self_attn.o_proj.input_scale": "model-00020-of-00086.safetensors", @@ -1107,6 +1202,7 @@ "model.layers.28.self_attn.q_proj.weight": "model-00020-of-00086.safetensors", "model.layers.28.self_attn.q_proj.weight_scale": "model-00020-of-00086.safetensors", "model.layers.28.self_attn.v_proj.input_scale": "model-00020-of-00086.safetensors", + "model.layers.28.self_attn.v_proj.v_scale": "model-00020-of-00086.safetensors", "model.layers.28.self_attn.v_proj.weight": "model-00020-of-00086.safetensors", "model.layers.28.self_attn.v_proj.weight_scale": "model-00020-of-00086.safetensors", "model.layers.29.input_layernorm.weight": "model-00021-of-00086.safetensors", @@ -1121,6 +1217,7 @@ "model.layers.29.mlp.up_proj.weight_scale": "model-00021-of-00086.safetensors", "model.layers.29.post_attention_layernorm.weight": "model-00021-of-00086.safetensors", "model.layers.29.self_attn.k_proj.input_scale": "model-00021-of-00086.safetensors", + "model.layers.29.self_attn.k_proj.k_scale": "model-00021-of-00086.safetensors", "model.layers.29.self_attn.k_proj.weight": "model-00021-of-00086.safetensors", "model.layers.29.self_attn.k_proj.weight_scale": "model-00021-of-00086.safetensors", "model.layers.29.self_attn.o_proj.input_scale": "model-00021-of-00086.safetensors", @@ -1130,6 +1227,7 @@ "model.layers.29.self_attn.q_proj.weight": "model-00021-of-00086.safetensors", "model.layers.29.self_attn.q_proj.weight_scale": "model-00021-of-00086.safetensors", "model.layers.29.self_attn.v_proj.input_scale": "model-00021-of-00086.safetensors", + "model.layers.29.self_attn.v_proj.v_scale": "model-00021-of-00086.safetensors", "model.layers.29.self_attn.v_proj.weight": "model-00021-of-00086.safetensors", "model.layers.29.self_attn.v_proj.weight_scale": "model-00021-of-00086.safetensors", "model.layers.3.input_layernorm.weight": "model-00004-of-00086.safetensors", @@ -1144,6 +1242,7 @@ "model.layers.3.mlp.up_proj.weight_scale": "model-00004-of-00086.safetensors", "model.layers.3.post_attention_layernorm.weight": "model-00004-of-00086.safetensors", "model.layers.3.self_attn.k_proj.input_scale": "model-00003-of-00086.safetensors", + "model.layers.3.self_attn.k_proj.k_scale": "model-00003-of-00086.safetensors", "model.layers.3.self_attn.k_proj.weight": "model-00003-of-00086.safetensors", "model.layers.3.self_attn.k_proj.weight_scale": "model-00003-of-00086.safetensors", "model.layers.3.self_attn.o_proj.input_scale": "model-00003-of-00086.safetensors", @@ -1153,6 +1252,7 @@ "model.layers.3.self_attn.q_proj.weight": "model-00003-of-00086.safetensors", "model.layers.3.self_attn.q_proj.weight_scale": "model-00003-of-00086.safetensors", "model.layers.3.self_attn.v_proj.input_scale": "model-00003-of-00086.safetensors", + "model.layers.3.self_attn.v_proj.v_scale": "model-00003-of-00086.safetensors", "model.layers.3.self_attn.v_proj.weight": "model-00003-of-00086.safetensors", "model.layers.3.self_attn.v_proj.weight_scale": "model-00003-of-00086.safetensors", "model.layers.30.input_layernorm.weight": "model-00022-of-00086.safetensors", @@ -1167,6 +1267,7 @@ "model.layers.30.mlp.up_proj.weight_scale": "model-00022-of-00086.safetensors", "model.layers.30.post_attention_layernorm.weight": "model-00022-of-00086.safetensors", "model.layers.30.self_attn.k_proj.input_scale": "model-00021-of-00086.safetensors", + "model.layers.30.self_attn.k_proj.k_scale": "model-00021-of-00086.safetensors", "model.layers.30.self_attn.k_proj.weight": "model-00021-of-00086.safetensors", "model.layers.30.self_attn.k_proj.weight_scale": "model-00021-of-00086.safetensors", "model.layers.30.self_attn.o_proj.input_scale": "model-00021-of-00086.safetensors", @@ -1176,6 +1277,7 @@ "model.layers.30.self_attn.q_proj.weight": "model-00021-of-00086.safetensors", "model.layers.30.self_attn.q_proj.weight_scale": "model-00021-of-00086.safetensors", "model.layers.30.self_attn.v_proj.input_scale": "model-00021-of-00086.safetensors", + "model.layers.30.self_attn.v_proj.v_scale": "model-00021-of-00086.safetensors", "model.layers.30.self_attn.v_proj.weight": "model-00021-of-00086.safetensors", "model.layers.30.self_attn.v_proj.weight_scale": "model-00021-of-00086.safetensors", "model.layers.31.input_layernorm.weight": "model-00023-of-00086.safetensors", @@ -1190,6 +1292,7 @@ "model.layers.31.mlp.up_proj.weight_scale": "model-00022-of-00086.safetensors", "model.layers.31.post_attention_layernorm.weight": "model-00023-of-00086.safetensors", "model.layers.31.self_attn.k_proj.input_scale": "model-00022-of-00086.safetensors", + "model.layers.31.self_attn.k_proj.k_scale": "model-00022-of-00086.safetensors", "model.layers.31.self_attn.k_proj.weight": "model-00022-of-00086.safetensors", "model.layers.31.self_attn.k_proj.weight_scale": "model-00022-of-00086.safetensors", "model.layers.31.self_attn.o_proj.input_scale": "model-00022-of-00086.safetensors", @@ -1199,6 +1302,7 @@ "model.layers.31.self_attn.q_proj.weight": "model-00022-of-00086.safetensors", "model.layers.31.self_attn.q_proj.weight_scale": "model-00022-of-00086.safetensors", "model.layers.31.self_attn.v_proj.input_scale": "model-00022-of-00086.safetensors", + "model.layers.31.self_attn.v_proj.v_scale": "model-00022-of-00086.safetensors", "model.layers.31.self_attn.v_proj.weight": "model-00022-of-00086.safetensors", "model.layers.31.self_attn.v_proj.weight_scale": "model-00022-of-00086.safetensors", "model.layers.32.input_layernorm.weight": "model-00023-of-00086.safetensors", @@ -1213,6 +1317,7 @@ "model.layers.32.mlp.up_proj.weight_scale": "model-00023-of-00086.safetensors", "model.layers.32.post_attention_layernorm.weight": "model-00023-of-00086.safetensors", "model.layers.32.self_attn.k_proj.input_scale": "model-00023-of-00086.safetensors", + "model.layers.32.self_attn.k_proj.k_scale": "model-00023-of-00086.safetensors", "model.layers.32.self_attn.k_proj.weight": "model-00023-of-00086.safetensors", "model.layers.32.self_attn.k_proj.weight_scale": "model-00023-of-00086.safetensors", "model.layers.32.self_attn.o_proj.input_scale": "model-00023-of-00086.safetensors", @@ -1222,6 +1327,7 @@ "model.layers.32.self_attn.q_proj.weight": "model-00023-of-00086.safetensors", "model.layers.32.self_attn.q_proj.weight_scale": "model-00023-of-00086.safetensors", "model.layers.32.self_attn.v_proj.input_scale": "model-00023-of-00086.safetensors", + "model.layers.32.self_attn.v_proj.v_scale": "model-00023-of-00086.safetensors", "model.layers.32.self_attn.v_proj.weight": "model-00023-of-00086.safetensors", "model.layers.32.self_attn.v_proj.weight_scale": "model-00023-of-00086.safetensors", "model.layers.33.input_layernorm.weight": "model-00024-of-00086.safetensors", @@ -1236,6 +1342,7 @@ "model.layers.33.mlp.up_proj.weight_scale": "model-00024-of-00086.safetensors", "model.layers.33.post_attention_layernorm.weight": "model-00024-of-00086.safetensors", "model.layers.33.self_attn.k_proj.input_scale": "model-00023-of-00086.safetensors", + "model.layers.33.self_attn.k_proj.k_scale": "model-00023-of-00086.safetensors", "model.layers.33.self_attn.k_proj.weight": "model-00023-of-00086.safetensors", "model.layers.33.self_attn.k_proj.weight_scale": "model-00023-of-00086.safetensors", "model.layers.33.self_attn.o_proj.input_scale": "model-00023-of-00086.safetensors", @@ -1245,6 +1352,7 @@ "model.layers.33.self_attn.q_proj.weight": "model-00023-of-00086.safetensors", "model.layers.33.self_attn.q_proj.weight_scale": "model-00023-of-00086.safetensors", "model.layers.33.self_attn.v_proj.input_scale": "model-00023-of-00086.safetensors", + "model.layers.33.self_attn.v_proj.v_scale": "model-00023-of-00086.safetensors", "model.layers.33.self_attn.v_proj.weight": "model-00023-of-00086.safetensors", "model.layers.33.self_attn.v_proj.weight_scale": "model-00023-of-00086.safetensors", "model.layers.34.input_layernorm.weight": "model-00025-of-00086.safetensors", @@ -1259,6 +1367,7 @@ "model.layers.34.mlp.up_proj.weight_scale": "model-00024-of-00086.safetensors", "model.layers.34.post_attention_layernorm.weight": "model-00025-of-00086.safetensors", "model.layers.34.self_attn.k_proj.input_scale": "model-00024-of-00086.safetensors", + "model.layers.34.self_attn.k_proj.k_scale": "model-00024-of-00086.safetensors", "model.layers.34.self_attn.k_proj.weight": "model-00024-of-00086.safetensors", "model.layers.34.self_attn.k_proj.weight_scale": "model-00024-of-00086.safetensors", "model.layers.34.self_attn.o_proj.input_scale": "model-00024-of-00086.safetensors", @@ -1268,6 +1377,7 @@ "model.layers.34.self_attn.q_proj.weight": "model-00024-of-00086.safetensors", "model.layers.34.self_attn.q_proj.weight_scale": "model-00024-of-00086.safetensors", "model.layers.34.self_attn.v_proj.input_scale": "model-00024-of-00086.safetensors", + "model.layers.34.self_attn.v_proj.v_scale": "model-00024-of-00086.safetensors", "model.layers.34.self_attn.v_proj.weight": "model-00024-of-00086.safetensors", "model.layers.34.self_attn.v_proj.weight_scale": "model-00024-of-00086.safetensors", "model.layers.35.input_layernorm.weight": "model-00025-of-00086.safetensors", @@ -1282,6 +1392,7 @@ "model.layers.35.mlp.up_proj.weight_scale": "model-00025-of-00086.safetensors", "model.layers.35.post_attention_layernorm.weight": "model-00025-of-00086.safetensors", "model.layers.35.self_attn.k_proj.input_scale": "model-00025-of-00086.safetensors", + "model.layers.35.self_attn.k_proj.k_scale": "model-00025-of-00086.safetensors", "model.layers.35.self_attn.k_proj.weight": "model-00025-of-00086.safetensors", "model.layers.35.self_attn.k_proj.weight_scale": "model-00025-of-00086.safetensors", "model.layers.35.self_attn.o_proj.input_scale": "model-00025-of-00086.safetensors", @@ -1291,6 +1402,7 @@ "model.layers.35.self_attn.q_proj.weight": "model-00025-of-00086.safetensors", "model.layers.35.self_attn.q_proj.weight_scale": "model-00025-of-00086.safetensors", "model.layers.35.self_attn.v_proj.input_scale": "model-00025-of-00086.safetensors", + "model.layers.35.self_attn.v_proj.v_scale": "model-00025-of-00086.safetensors", "model.layers.35.self_attn.v_proj.weight": "model-00025-of-00086.safetensors", "model.layers.35.self_attn.v_proj.weight_scale": "model-00025-of-00086.safetensors", "model.layers.36.input_layernorm.weight": "model-00026-of-00086.safetensors", @@ -1305,6 +1417,7 @@ "model.layers.36.mlp.up_proj.weight_scale": "model-00026-of-00086.safetensors", "model.layers.36.post_attention_layernorm.weight": "model-00026-of-00086.safetensors", "model.layers.36.self_attn.k_proj.input_scale": "model-00025-of-00086.safetensors", + "model.layers.36.self_attn.k_proj.k_scale": "model-00025-of-00086.safetensors", "model.layers.36.self_attn.k_proj.weight": "model-00025-of-00086.safetensors", "model.layers.36.self_attn.k_proj.weight_scale": "model-00025-of-00086.safetensors", "model.layers.36.self_attn.o_proj.input_scale": "model-00025-of-00086.safetensors", @@ -1314,6 +1427,7 @@ "model.layers.36.self_attn.q_proj.weight": "model-00025-of-00086.safetensors", "model.layers.36.self_attn.q_proj.weight_scale": "model-00025-of-00086.safetensors", "model.layers.36.self_attn.v_proj.input_scale": "model-00025-of-00086.safetensors", + "model.layers.36.self_attn.v_proj.v_scale": "model-00025-of-00086.safetensors", "model.layers.36.self_attn.v_proj.weight": "model-00025-of-00086.safetensors", "model.layers.36.self_attn.v_proj.weight_scale": "model-00025-of-00086.safetensors", "model.layers.37.input_layernorm.weight": "model-00027-of-00086.safetensors", @@ -1328,6 +1442,7 @@ "model.layers.37.mlp.up_proj.weight_scale": "model-00026-of-00086.safetensors", "model.layers.37.post_attention_layernorm.weight": "model-00027-of-00086.safetensors", "model.layers.37.self_attn.k_proj.input_scale": "model-00026-of-00086.safetensors", + "model.layers.37.self_attn.k_proj.k_scale": "model-00026-of-00086.safetensors", "model.layers.37.self_attn.k_proj.weight": "model-00026-of-00086.safetensors", "model.layers.37.self_attn.k_proj.weight_scale": "model-00026-of-00086.safetensors", "model.layers.37.self_attn.o_proj.input_scale": "model-00026-of-00086.safetensors", @@ -1337,6 +1452,7 @@ "model.layers.37.self_attn.q_proj.weight": "model-00026-of-00086.safetensors", "model.layers.37.self_attn.q_proj.weight_scale": "model-00026-of-00086.safetensors", "model.layers.37.self_attn.v_proj.input_scale": "model-00026-of-00086.safetensors", + "model.layers.37.self_attn.v_proj.v_scale": "model-00026-of-00086.safetensors", "model.layers.37.self_attn.v_proj.weight": "model-00026-of-00086.safetensors", "model.layers.37.self_attn.v_proj.weight_scale": "model-00026-of-00086.safetensors", "model.layers.38.input_layernorm.weight": "model-00027-of-00086.safetensors", @@ -1351,6 +1467,7 @@ "model.layers.38.mlp.up_proj.weight_scale": "model-00027-of-00086.safetensors", "model.layers.38.post_attention_layernorm.weight": "model-00027-of-00086.safetensors", "model.layers.38.self_attn.k_proj.input_scale": "model-00027-of-00086.safetensors", + "model.layers.38.self_attn.k_proj.k_scale": "model-00027-of-00086.safetensors", "model.layers.38.self_attn.k_proj.weight": "model-00027-of-00086.safetensors", "model.layers.38.self_attn.k_proj.weight_scale": "model-00027-of-00086.safetensors", "model.layers.38.self_attn.o_proj.input_scale": "model-00027-of-00086.safetensors", @@ -1360,6 +1477,7 @@ "model.layers.38.self_attn.q_proj.weight": "model-00027-of-00086.safetensors", "model.layers.38.self_attn.q_proj.weight_scale": "model-00027-of-00086.safetensors", "model.layers.38.self_attn.v_proj.input_scale": "model-00027-of-00086.safetensors", + "model.layers.38.self_attn.v_proj.v_scale": "model-00027-of-00086.safetensors", "model.layers.38.self_attn.v_proj.weight": "model-00027-of-00086.safetensors", "model.layers.38.self_attn.v_proj.weight_scale": "model-00027-of-00086.safetensors", "model.layers.39.input_layernorm.weight": "model-00028-of-00086.safetensors", @@ -1374,6 +1492,7 @@ "model.layers.39.mlp.up_proj.weight_scale": "model-00028-of-00086.safetensors", "model.layers.39.post_attention_layernorm.weight": "model-00028-of-00086.safetensors", "model.layers.39.self_attn.k_proj.input_scale": "model-00027-of-00086.safetensors", + "model.layers.39.self_attn.k_proj.k_scale": "model-00027-of-00086.safetensors", "model.layers.39.self_attn.k_proj.weight": "model-00027-of-00086.safetensors", "model.layers.39.self_attn.k_proj.weight_scale": "model-00027-of-00086.safetensors", "model.layers.39.self_attn.o_proj.input_scale": "model-00027-of-00086.safetensors", @@ -1383,6 +1502,7 @@ "model.layers.39.self_attn.q_proj.weight": "model-00027-of-00086.safetensors", "model.layers.39.self_attn.q_proj.weight_scale": "model-00027-of-00086.safetensors", "model.layers.39.self_attn.v_proj.input_scale": "model-00027-of-00086.safetensors", + "model.layers.39.self_attn.v_proj.v_scale": "model-00027-of-00086.safetensors", "model.layers.39.self_attn.v_proj.weight": "model-00027-of-00086.safetensors", "model.layers.39.self_attn.v_proj.weight_scale": "model-00027-of-00086.safetensors", "model.layers.4.input_layernorm.weight": "model-00005-of-00086.safetensors", @@ -1397,6 +1517,7 @@ "model.layers.4.mlp.up_proj.weight_scale": "model-00004-of-00086.safetensors", "model.layers.4.post_attention_layernorm.weight": "model-00005-of-00086.safetensors", "model.layers.4.self_attn.k_proj.input_scale": "model-00004-of-00086.safetensors", + "model.layers.4.self_attn.k_proj.k_scale": "model-00004-of-00086.safetensors", "model.layers.4.self_attn.k_proj.weight": "model-00004-of-00086.safetensors", "model.layers.4.self_attn.k_proj.weight_scale": "model-00004-of-00086.safetensors", "model.layers.4.self_attn.o_proj.input_scale": "model-00004-of-00086.safetensors", @@ -1406,6 +1527,7 @@ "model.layers.4.self_attn.q_proj.weight": "model-00004-of-00086.safetensors", "model.layers.4.self_attn.q_proj.weight_scale": "model-00004-of-00086.safetensors", "model.layers.4.self_attn.v_proj.input_scale": "model-00004-of-00086.safetensors", + "model.layers.4.self_attn.v_proj.v_scale": "model-00004-of-00086.safetensors", "model.layers.4.self_attn.v_proj.weight": "model-00004-of-00086.safetensors", "model.layers.4.self_attn.v_proj.weight_scale": "model-00004-of-00086.safetensors", "model.layers.40.input_layernorm.weight": "model-00029-of-00086.safetensors", @@ -1420,6 +1542,7 @@ "model.layers.40.mlp.up_proj.weight_scale": "model-00028-of-00086.safetensors", "model.layers.40.post_attention_layernorm.weight": "model-00029-of-00086.safetensors", "model.layers.40.self_attn.k_proj.input_scale": "model-00028-of-00086.safetensors", + "model.layers.40.self_attn.k_proj.k_scale": "model-00028-of-00086.safetensors", "model.layers.40.self_attn.k_proj.weight": "model-00028-of-00086.safetensors", "model.layers.40.self_attn.k_proj.weight_scale": "model-00028-of-00086.safetensors", "model.layers.40.self_attn.o_proj.input_scale": "model-00028-of-00086.safetensors", @@ -1429,6 +1552,7 @@ "model.layers.40.self_attn.q_proj.weight": "model-00028-of-00086.safetensors", "model.layers.40.self_attn.q_proj.weight_scale": "model-00028-of-00086.safetensors", "model.layers.40.self_attn.v_proj.input_scale": "model-00028-of-00086.safetensors", + "model.layers.40.self_attn.v_proj.v_scale": "model-00028-of-00086.safetensors", "model.layers.40.self_attn.v_proj.weight": "model-00028-of-00086.safetensors", "model.layers.40.self_attn.v_proj.weight_scale": "model-00028-of-00086.safetensors", "model.layers.41.input_layernorm.weight": "model-00029-of-00086.safetensors", @@ -1443,6 +1567,7 @@ "model.layers.41.mlp.up_proj.weight_scale": "model-00029-of-00086.safetensors", "model.layers.41.post_attention_layernorm.weight": "model-00029-of-00086.safetensors", "model.layers.41.self_attn.k_proj.input_scale": "model-00029-of-00086.safetensors", + "model.layers.41.self_attn.k_proj.k_scale": "model-00029-of-00086.safetensors", "model.layers.41.self_attn.k_proj.weight": "model-00029-of-00086.safetensors", "model.layers.41.self_attn.k_proj.weight_scale": "model-00029-of-00086.safetensors", "model.layers.41.self_attn.o_proj.input_scale": "model-00029-of-00086.safetensors", @@ -1452,6 +1577,7 @@ "model.layers.41.self_attn.q_proj.weight": "model-00029-of-00086.safetensors", "model.layers.41.self_attn.q_proj.weight_scale": "model-00029-of-00086.safetensors", "model.layers.41.self_attn.v_proj.input_scale": "model-00029-of-00086.safetensors", + "model.layers.41.self_attn.v_proj.v_scale": "model-00029-of-00086.safetensors", "model.layers.41.self_attn.v_proj.weight": "model-00029-of-00086.safetensors", "model.layers.41.self_attn.v_proj.weight_scale": "model-00029-of-00086.safetensors", "model.layers.42.input_layernorm.weight": "model-00030-of-00086.safetensors", @@ -1466,6 +1592,7 @@ "model.layers.42.mlp.up_proj.weight_scale": "model-00030-of-00086.safetensors", "model.layers.42.post_attention_layernorm.weight": "model-00030-of-00086.safetensors", "model.layers.42.self_attn.k_proj.input_scale": "model-00029-of-00086.safetensors", + "model.layers.42.self_attn.k_proj.k_scale": "model-00029-of-00086.safetensors", "model.layers.42.self_attn.k_proj.weight": "model-00029-of-00086.safetensors", "model.layers.42.self_attn.k_proj.weight_scale": "model-00029-of-00086.safetensors", "model.layers.42.self_attn.o_proj.input_scale": "model-00029-of-00086.safetensors", @@ -1475,6 +1602,7 @@ "model.layers.42.self_attn.q_proj.weight": "model-00029-of-00086.safetensors", "model.layers.42.self_attn.q_proj.weight_scale": "model-00029-of-00086.safetensors", "model.layers.42.self_attn.v_proj.input_scale": "model-00029-of-00086.safetensors", + "model.layers.42.self_attn.v_proj.v_scale": "model-00029-of-00086.safetensors", "model.layers.42.self_attn.v_proj.weight": "model-00029-of-00086.safetensors", "model.layers.42.self_attn.v_proj.weight_scale": "model-00029-of-00086.safetensors", "model.layers.43.input_layernorm.weight": "model-00031-of-00086.safetensors", @@ -1489,6 +1617,7 @@ "model.layers.43.mlp.up_proj.weight_scale": "model-00030-of-00086.safetensors", "model.layers.43.post_attention_layernorm.weight": "model-00031-of-00086.safetensors", "model.layers.43.self_attn.k_proj.input_scale": "model-00030-of-00086.safetensors", + "model.layers.43.self_attn.k_proj.k_scale": "model-00030-of-00086.safetensors", "model.layers.43.self_attn.k_proj.weight": "model-00030-of-00086.safetensors", "model.layers.43.self_attn.k_proj.weight_scale": "model-00030-of-00086.safetensors", "model.layers.43.self_attn.o_proj.input_scale": "model-00030-of-00086.safetensors", @@ -1498,6 +1627,7 @@ "model.layers.43.self_attn.q_proj.weight": "model-00030-of-00086.safetensors", "model.layers.43.self_attn.q_proj.weight_scale": "model-00030-of-00086.safetensors", "model.layers.43.self_attn.v_proj.input_scale": "model-00030-of-00086.safetensors", + "model.layers.43.self_attn.v_proj.v_scale": "model-00030-of-00086.safetensors", "model.layers.43.self_attn.v_proj.weight": "model-00030-of-00086.safetensors", "model.layers.43.self_attn.v_proj.weight_scale": "model-00030-of-00086.safetensors", "model.layers.44.input_layernorm.weight": "model-00031-of-00086.safetensors", @@ -1512,6 +1642,7 @@ "model.layers.44.mlp.up_proj.weight_scale": "model-00031-of-00086.safetensors", "model.layers.44.post_attention_layernorm.weight": "model-00031-of-00086.safetensors", "model.layers.44.self_attn.k_proj.input_scale": "model-00031-of-00086.safetensors", + "model.layers.44.self_attn.k_proj.k_scale": "model-00031-of-00086.safetensors", "model.layers.44.self_attn.k_proj.weight": "model-00031-of-00086.safetensors", "model.layers.44.self_attn.k_proj.weight_scale": "model-00031-of-00086.safetensors", "model.layers.44.self_attn.o_proj.input_scale": "model-00031-of-00086.safetensors", @@ -1521,6 +1652,7 @@ "model.layers.44.self_attn.q_proj.weight": "model-00031-of-00086.safetensors", "model.layers.44.self_attn.q_proj.weight_scale": "model-00031-of-00086.safetensors", "model.layers.44.self_attn.v_proj.input_scale": "model-00031-of-00086.safetensors", + "model.layers.44.self_attn.v_proj.v_scale": "model-00031-of-00086.safetensors", "model.layers.44.self_attn.v_proj.weight": "model-00031-of-00086.safetensors", "model.layers.44.self_attn.v_proj.weight_scale": "model-00031-of-00086.safetensors", "model.layers.45.input_layernorm.weight": "model-00032-of-00086.safetensors", @@ -1535,6 +1667,7 @@ "model.layers.45.mlp.up_proj.weight_scale": "model-00032-of-00086.safetensors", "model.layers.45.post_attention_layernorm.weight": "model-00032-of-00086.safetensors", "model.layers.45.self_attn.k_proj.input_scale": "model-00031-of-00086.safetensors", + "model.layers.45.self_attn.k_proj.k_scale": "model-00031-of-00086.safetensors", "model.layers.45.self_attn.k_proj.weight": "model-00031-of-00086.safetensors", "model.layers.45.self_attn.k_proj.weight_scale": "model-00031-of-00086.safetensors", "model.layers.45.self_attn.o_proj.input_scale": "model-00031-of-00086.safetensors", @@ -1544,6 +1677,7 @@ "model.layers.45.self_attn.q_proj.weight": "model-00031-of-00086.safetensors", "model.layers.45.self_attn.q_proj.weight_scale": "model-00031-of-00086.safetensors", "model.layers.45.self_attn.v_proj.input_scale": "model-00031-of-00086.safetensors", + "model.layers.45.self_attn.v_proj.v_scale": "model-00031-of-00086.safetensors", "model.layers.45.self_attn.v_proj.weight": "model-00031-of-00086.safetensors", "model.layers.45.self_attn.v_proj.weight_scale": "model-00031-of-00086.safetensors", "model.layers.46.input_layernorm.weight": "model-00033-of-00086.safetensors", @@ -1558,6 +1692,7 @@ "model.layers.46.mlp.up_proj.weight_scale": "model-00032-of-00086.safetensors", "model.layers.46.post_attention_layernorm.weight": "model-00033-of-00086.safetensors", "model.layers.46.self_attn.k_proj.input_scale": "model-00032-of-00086.safetensors", + "model.layers.46.self_attn.k_proj.k_scale": "model-00032-of-00086.safetensors", "model.layers.46.self_attn.k_proj.weight": "model-00032-of-00086.safetensors", "model.layers.46.self_attn.k_proj.weight_scale": "model-00032-of-00086.safetensors", "model.layers.46.self_attn.o_proj.input_scale": "model-00032-of-00086.safetensors", @@ -1567,6 +1702,7 @@ "model.layers.46.self_attn.q_proj.weight": "model-00032-of-00086.safetensors", "model.layers.46.self_attn.q_proj.weight_scale": "model-00032-of-00086.safetensors", "model.layers.46.self_attn.v_proj.input_scale": "model-00032-of-00086.safetensors", + "model.layers.46.self_attn.v_proj.v_scale": "model-00032-of-00086.safetensors", "model.layers.46.self_attn.v_proj.weight": "model-00032-of-00086.safetensors", "model.layers.46.self_attn.v_proj.weight_scale": "model-00032-of-00086.safetensors", "model.layers.47.input_layernorm.weight": "model-00033-of-00086.safetensors", @@ -1581,6 +1717,7 @@ "model.layers.47.mlp.up_proj.weight_scale": "model-00033-of-00086.safetensors", "model.layers.47.post_attention_layernorm.weight": "model-00033-of-00086.safetensors", "model.layers.47.self_attn.k_proj.input_scale": "model-00033-of-00086.safetensors", + "model.layers.47.self_attn.k_proj.k_scale": "model-00033-of-00086.safetensors", "model.layers.47.self_attn.k_proj.weight": "model-00033-of-00086.safetensors", "model.layers.47.self_attn.k_proj.weight_scale": "model-00033-of-00086.safetensors", "model.layers.47.self_attn.o_proj.input_scale": "model-00033-of-00086.safetensors", @@ -1590,6 +1727,7 @@ "model.layers.47.self_attn.q_proj.weight": "model-00033-of-00086.safetensors", "model.layers.47.self_attn.q_proj.weight_scale": "model-00033-of-00086.safetensors", "model.layers.47.self_attn.v_proj.input_scale": "model-00033-of-00086.safetensors", + "model.layers.47.self_attn.v_proj.v_scale": "model-00033-of-00086.safetensors", "model.layers.47.self_attn.v_proj.weight": "model-00033-of-00086.safetensors", "model.layers.47.self_attn.v_proj.weight_scale": "model-00033-of-00086.safetensors", "model.layers.48.input_layernorm.weight": "model-00034-of-00086.safetensors", @@ -1604,6 +1742,7 @@ "model.layers.48.mlp.up_proj.weight_scale": "model-00034-of-00086.safetensors", "model.layers.48.post_attention_layernorm.weight": "model-00034-of-00086.safetensors", "model.layers.48.self_attn.k_proj.input_scale": "model-00033-of-00086.safetensors", + "model.layers.48.self_attn.k_proj.k_scale": "model-00033-of-00086.safetensors", "model.layers.48.self_attn.k_proj.weight": "model-00033-of-00086.safetensors", "model.layers.48.self_attn.k_proj.weight_scale": "model-00033-of-00086.safetensors", "model.layers.48.self_attn.o_proj.input_scale": "model-00033-of-00086.safetensors", @@ -1613,6 +1752,7 @@ "model.layers.48.self_attn.q_proj.weight": "model-00033-of-00086.safetensors", "model.layers.48.self_attn.q_proj.weight_scale": "model-00033-of-00086.safetensors", "model.layers.48.self_attn.v_proj.input_scale": "model-00033-of-00086.safetensors", + "model.layers.48.self_attn.v_proj.v_scale": "model-00033-of-00086.safetensors", "model.layers.48.self_attn.v_proj.weight": "model-00033-of-00086.safetensors", "model.layers.48.self_attn.v_proj.weight_scale": "model-00033-of-00086.safetensors", "model.layers.49.input_layernorm.weight": "model-00035-of-00086.safetensors", @@ -1627,6 +1767,7 @@ "model.layers.49.mlp.up_proj.weight_scale": "model-00034-of-00086.safetensors", "model.layers.49.post_attention_layernorm.weight": "model-00035-of-00086.safetensors", "model.layers.49.self_attn.k_proj.input_scale": "model-00034-of-00086.safetensors", + "model.layers.49.self_attn.k_proj.k_scale": "model-00034-of-00086.safetensors", "model.layers.49.self_attn.k_proj.weight": "model-00034-of-00086.safetensors", "model.layers.49.self_attn.k_proj.weight_scale": "model-00034-of-00086.safetensors", "model.layers.49.self_attn.o_proj.input_scale": "model-00034-of-00086.safetensors", @@ -1636,6 +1777,7 @@ "model.layers.49.self_attn.q_proj.weight": "model-00034-of-00086.safetensors", "model.layers.49.self_attn.q_proj.weight_scale": "model-00034-of-00086.safetensors", "model.layers.49.self_attn.v_proj.input_scale": "model-00034-of-00086.safetensors", + "model.layers.49.self_attn.v_proj.v_scale": "model-00034-of-00086.safetensors", "model.layers.49.self_attn.v_proj.weight": "model-00034-of-00086.safetensors", "model.layers.49.self_attn.v_proj.weight_scale": "model-00034-of-00086.safetensors", "model.layers.5.input_layernorm.weight": "model-00005-of-00086.safetensors", @@ -1650,6 +1792,7 @@ "model.layers.5.mlp.up_proj.weight_scale": "model-00005-of-00086.safetensors", "model.layers.5.post_attention_layernorm.weight": "model-00005-of-00086.safetensors", "model.layers.5.self_attn.k_proj.input_scale": "model-00005-of-00086.safetensors", + "model.layers.5.self_attn.k_proj.k_scale": "model-00005-of-00086.safetensors", "model.layers.5.self_attn.k_proj.weight": "model-00005-of-00086.safetensors", "model.layers.5.self_attn.k_proj.weight_scale": "model-00005-of-00086.safetensors", "model.layers.5.self_attn.o_proj.input_scale": "model-00005-of-00086.safetensors", @@ -1659,6 +1802,7 @@ "model.layers.5.self_attn.q_proj.weight": "model-00005-of-00086.safetensors", "model.layers.5.self_attn.q_proj.weight_scale": "model-00005-of-00086.safetensors", "model.layers.5.self_attn.v_proj.input_scale": "model-00005-of-00086.safetensors", + "model.layers.5.self_attn.v_proj.v_scale": "model-00005-of-00086.safetensors", "model.layers.5.self_attn.v_proj.weight": "model-00005-of-00086.safetensors", "model.layers.5.self_attn.v_proj.weight_scale": "model-00005-of-00086.safetensors", "model.layers.50.input_layernorm.weight": "model-00035-of-00086.safetensors", @@ -1673,6 +1817,7 @@ "model.layers.50.mlp.up_proj.weight_scale": "model-00035-of-00086.safetensors", "model.layers.50.post_attention_layernorm.weight": "model-00035-of-00086.safetensors", "model.layers.50.self_attn.k_proj.input_scale": "model-00035-of-00086.safetensors", + "model.layers.50.self_attn.k_proj.k_scale": "model-00035-of-00086.safetensors", "model.layers.50.self_attn.k_proj.weight": "model-00035-of-00086.safetensors", "model.layers.50.self_attn.k_proj.weight_scale": "model-00035-of-00086.safetensors", "model.layers.50.self_attn.o_proj.input_scale": "model-00035-of-00086.safetensors", @@ -1682,6 +1827,7 @@ "model.layers.50.self_attn.q_proj.weight": "model-00035-of-00086.safetensors", "model.layers.50.self_attn.q_proj.weight_scale": "model-00035-of-00086.safetensors", "model.layers.50.self_attn.v_proj.input_scale": "model-00035-of-00086.safetensors", + "model.layers.50.self_attn.v_proj.v_scale": "model-00035-of-00086.safetensors", "model.layers.50.self_attn.v_proj.weight": "model-00035-of-00086.safetensors", "model.layers.50.self_attn.v_proj.weight_scale": "model-00035-of-00086.safetensors", "model.layers.51.input_layernorm.weight": "model-00036-of-00086.safetensors", @@ -1696,6 +1842,7 @@ "model.layers.51.mlp.up_proj.weight_scale": "model-00036-of-00086.safetensors", "model.layers.51.post_attention_layernorm.weight": "model-00036-of-00086.safetensors", "model.layers.51.self_attn.k_proj.input_scale": "model-00035-of-00086.safetensors", + "model.layers.51.self_attn.k_proj.k_scale": "model-00035-of-00086.safetensors", "model.layers.51.self_attn.k_proj.weight": "model-00035-of-00086.safetensors", "model.layers.51.self_attn.k_proj.weight_scale": "model-00035-of-00086.safetensors", "model.layers.51.self_attn.o_proj.input_scale": "model-00035-of-00086.safetensors", @@ -1705,6 +1852,7 @@ "model.layers.51.self_attn.q_proj.weight": "model-00035-of-00086.safetensors", "model.layers.51.self_attn.q_proj.weight_scale": "model-00035-of-00086.safetensors", "model.layers.51.self_attn.v_proj.input_scale": "model-00035-of-00086.safetensors", + "model.layers.51.self_attn.v_proj.v_scale": "model-00035-of-00086.safetensors", "model.layers.51.self_attn.v_proj.weight": "model-00035-of-00086.safetensors", "model.layers.51.self_attn.v_proj.weight_scale": "model-00035-of-00086.safetensors", "model.layers.52.input_layernorm.weight": "model-00037-of-00086.safetensors", @@ -1719,6 +1867,7 @@ "model.layers.52.mlp.up_proj.weight_scale": "model-00036-of-00086.safetensors", "model.layers.52.post_attention_layernorm.weight": "model-00037-of-00086.safetensors", "model.layers.52.self_attn.k_proj.input_scale": "model-00036-of-00086.safetensors", + "model.layers.52.self_attn.k_proj.k_scale": "model-00036-of-00086.safetensors", "model.layers.52.self_attn.k_proj.weight": "model-00036-of-00086.safetensors", "model.layers.52.self_attn.k_proj.weight_scale": "model-00036-of-00086.safetensors", "model.layers.52.self_attn.o_proj.input_scale": "model-00036-of-00086.safetensors", @@ -1728,6 +1877,7 @@ "model.layers.52.self_attn.q_proj.weight": "model-00036-of-00086.safetensors", "model.layers.52.self_attn.q_proj.weight_scale": "model-00036-of-00086.safetensors", "model.layers.52.self_attn.v_proj.input_scale": "model-00036-of-00086.safetensors", + "model.layers.52.self_attn.v_proj.v_scale": "model-00036-of-00086.safetensors", "model.layers.52.self_attn.v_proj.weight": "model-00036-of-00086.safetensors", "model.layers.52.self_attn.v_proj.weight_scale": "model-00036-of-00086.safetensors", "model.layers.53.input_layernorm.weight": "model-00037-of-00086.safetensors", @@ -1742,6 +1892,7 @@ "model.layers.53.mlp.up_proj.weight_scale": "model-00037-of-00086.safetensors", "model.layers.53.post_attention_layernorm.weight": "model-00037-of-00086.safetensors", "model.layers.53.self_attn.k_proj.input_scale": "model-00037-of-00086.safetensors", + "model.layers.53.self_attn.k_proj.k_scale": "model-00037-of-00086.safetensors", "model.layers.53.self_attn.k_proj.weight": "model-00037-of-00086.safetensors", "model.layers.53.self_attn.k_proj.weight_scale": "model-00037-of-00086.safetensors", "model.layers.53.self_attn.o_proj.input_scale": "model-00037-of-00086.safetensors", @@ -1751,6 +1902,7 @@ "model.layers.53.self_attn.q_proj.weight": "model-00037-of-00086.safetensors", "model.layers.53.self_attn.q_proj.weight_scale": "model-00037-of-00086.safetensors", "model.layers.53.self_attn.v_proj.input_scale": "model-00037-of-00086.safetensors", + "model.layers.53.self_attn.v_proj.v_scale": "model-00037-of-00086.safetensors", "model.layers.53.self_attn.v_proj.weight": "model-00037-of-00086.safetensors", "model.layers.53.self_attn.v_proj.weight_scale": "model-00037-of-00086.safetensors", "model.layers.54.input_layernorm.weight": "model-00038-of-00086.safetensors", @@ -1765,6 +1917,7 @@ "model.layers.54.mlp.up_proj.weight_scale": "model-00038-of-00086.safetensors", "model.layers.54.post_attention_layernorm.weight": "model-00038-of-00086.safetensors", "model.layers.54.self_attn.k_proj.input_scale": "model-00037-of-00086.safetensors", + "model.layers.54.self_attn.k_proj.k_scale": "model-00037-of-00086.safetensors", "model.layers.54.self_attn.k_proj.weight": "model-00037-of-00086.safetensors", "model.layers.54.self_attn.k_proj.weight_scale": "model-00037-of-00086.safetensors", "model.layers.54.self_attn.o_proj.input_scale": "model-00037-of-00086.safetensors", @@ -1774,6 +1927,7 @@ "model.layers.54.self_attn.q_proj.weight": "model-00037-of-00086.safetensors", "model.layers.54.self_attn.q_proj.weight_scale": "model-00037-of-00086.safetensors", "model.layers.54.self_attn.v_proj.input_scale": "model-00037-of-00086.safetensors", + "model.layers.54.self_attn.v_proj.v_scale": "model-00037-of-00086.safetensors", "model.layers.54.self_attn.v_proj.weight": "model-00037-of-00086.safetensors", "model.layers.54.self_attn.v_proj.weight_scale": "model-00037-of-00086.safetensors", "model.layers.55.input_layernorm.weight": "model-00039-of-00086.safetensors", @@ -1788,6 +1942,7 @@ "model.layers.55.mlp.up_proj.weight_scale": "model-00038-of-00086.safetensors", "model.layers.55.post_attention_layernorm.weight": "model-00039-of-00086.safetensors", "model.layers.55.self_attn.k_proj.input_scale": "model-00038-of-00086.safetensors", + "model.layers.55.self_attn.k_proj.k_scale": "model-00038-of-00086.safetensors", "model.layers.55.self_attn.k_proj.weight": "model-00038-of-00086.safetensors", "model.layers.55.self_attn.k_proj.weight_scale": "model-00038-of-00086.safetensors", "model.layers.55.self_attn.o_proj.input_scale": "model-00038-of-00086.safetensors", @@ -1797,6 +1952,7 @@ "model.layers.55.self_attn.q_proj.weight": "model-00038-of-00086.safetensors", "model.layers.55.self_attn.q_proj.weight_scale": "model-00038-of-00086.safetensors", "model.layers.55.self_attn.v_proj.input_scale": "model-00038-of-00086.safetensors", + "model.layers.55.self_attn.v_proj.v_scale": "model-00038-of-00086.safetensors", "model.layers.55.self_attn.v_proj.weight": "model-00038-of-00086.safetensors", "model.layers.55.self_attn.v_proj.weight_scale": "model-00038-of-00086.safetensors", "model.layers.56.input_layernorm.weight": "model-00039-of-00086.safetensors", @@ -1811,6 +1967,7 @@ "model.layers.56.mlp.up_proj.weight_scale": "model-00039-of-00086.safetensors", "model.layers.56.post_attention_layernorm.weight": "model-00039-of-00086.safetensors", "model.layers.56.self_attn.k_proj.input_scale": "model-00039-of-00086.safetensors", + "model.layers.56.self_attn.k_proj.k_scale": "model-00039-of-00086.safetensors", "model.layers.56.self_attn.k_proj.weight": "model-00039-of-00086.safetensors", "model.layers.56.self_attn.k_proj.weight_scale": "model-00039-of-00086.safetensors", "model.layers.56.self_attn.o_proj.input_scale": "model-00039-of-00086.safetensors", @@ -1820,6 +1977,7 @@ "model.layers.56.self_attn.q_proj.weight": "model-00039-of-00086.safetensors", "model.layers.56.self_attn.q_proj.weight_scale": "model-00039-of-00086.safetensors", "model.layers.56.self_attn.v_proj.input_scale": "model-00039-of-00086.safetensors", + "model.layers.56.self_attn.v_proj.v_scale": "model-00039-of-00086.safetensors", "model.layers.56.self_attn.v_proj.weight": "model-00039-of-00086.safetensors", "model.layers.56.self_attn.v_proj.weight_scale": "model-00039-of-00086.safetensors", "model.layers.57.input_layernorm.weight": "model-00040-of-00086.safetensors", @@ -1834,6 +1992,7 @@ "model.layers.57.mlp.up_proj.weight_scale": "model-00040-of-00086.safetensors", "model.layers.57.post_attention_layernorm.weight": "model-00040-of-00086.safetensors", "model.layers.57.self_attn.k_proj.input_scale": "model-00039-of-00086.safetensors", + "model.layers.57.self_attn.k_proj.k_scale": "model-00039-of-00086.safetensors", "model.layers.57.self_attn.k_proj.weight": "model-00039-of-00086.safetensors", "model.layers.57.self_attn.k_proj.weight_scale": "model-00039-of-00086.safetensors", "model.layers.57.self_attn.o_proj.input_scale": "model-00039-of-00086.safetensors", @@ -1843,6 +2002,7 @@ "model.layers.57.self_attn.q_proj.weight": "model-00039-of-00086.safetensors", "model.layers.57.self_attn.q_proj.weight_scale": "model-00039-of-00086.safetensors", "model.layers.57.self_attn.v_proj.input_scale": "model-00039-of-00086.safetensors", + "model.layers.57.self_attn.v_proj.v_scale": "model-00039-of-00086.safetensors", "model.layers.57.self_attn.v_proj.weight": "model-00039-of-00086.safetensors", "model.layers.57.self_attn.v_proj.weight_scale": "model-00039-of-00086.safetensors", "model.layers.58.input_layernorm.weight": "model-00041-of-00086.safetensors", @@ -1857,6 +2017,7 @@ "model.layers.58.mlp.up_proj.weight_scale": "model-00040-of-00086.safetensors", "model.layers.58.post_attention_layernorm.weight": "model-00041-of-00086.safetensors", "model.layers.58.self_attn.k_proj.input_scale": "model-00040-of-00086.safetensors", + "model.layers.58.self_attn.k_proj.k_scale": "model-00040-of-00086.safetensors", "model.layers.58.self_attn.k_proj.weight": "model-00040-of-00086.safetensors", "model.layers.58.self_attn.k_proj.weight_scale": "model-00040-of-00086.safetensors", "model.layers.58.self_attn.o_proj.input_scale": "model-00040-of-00086.safetensors", @@ -1866,6 +2027,7 @@ "model.layers.58.self_attn.q_proj.weight": "model-00040-of-00086.safetensors", "model.layers.58.self_attn.q_proj.weight_scale": "model-00040-of-00086.safetensors", "model.layers.58.self_attn.v_proj.input_scale": "model-00040-of-00086.safetensors", + "model.layers.58.self_attn.v_proj.v_scale": "model-00040-of-00086.safetensors", "model.layers.58.self_attn.v_proj.weight": "model-00040-of-00086.safetensors", "model.layers.58.self_attn.v_proj.weight_scale": "model-00040-of-00086.safetensors", "model.layers.59.input_layernorm.weight": "model-00041-of-00086.safetensors", @@ -1880,6 +2042,7 @@ "model.layers.59.mlp.up_proj.weight_scale": "model-00041-of-00086.safetensors", "model.layers.59.post_attention_layernorm.weight": "model-00041-of-00086.safetensors", "model.layers.59.self_attn.k_proj.input_scale": "model-00041-of-00086.safetensors", + "model.layers.59.self_attn.k_proj.k_scale": "model-00041-of-00086.safetensors", "model.layers.59.self_attn.k_proj.weight": "model-00041-of-00086.safetensors", "model.layers.59.self_attn.k_proj.weight_scale": "model-00041-of-00086.safetensors", "model.layers.59.self_attn.o_proj.input_scale": "model-00041-of-00086.safetensors", @@ -1889,6 +2052,7 @@ "model.layers.59.self_attn.q_proj.weight": "model-00041-of-00086.safetensors", "model.layers.59.self_attn.q_proj.weight_scale": "model-00041-of-00086.safetensors", "model.layers.59.self_attn.v_proj.input_scale": "model-00041-of-00086.safetensors", + "model.layers.59.self_attn.v_proj.v_scale": "model-00041-of-00086.safetensors", "model.layers.59.self_attn.v_proj.weight": "model-00041-of-00086.safetensors", "model.layers.59.self_attn.v_proj.weight_scale": "model-00041-of-00086.safetensors", "model.layers.6.input_layernorm.weight": "model-00006-of-00086.safetensors", @@ -1903,6 +2067,7 @@ "model.layers.6.mlp.up_proj.weight_scale": "model-00006-of-00086.safetensors", "model.layers.6.post_attention_layernorm.weight": "model-00006-of-00086.safetensors", "model.layers.6.self_attn.k_proj.input_scale": "model-00005-of-00086.safetensors", + "model.layers.6.self_attn.k_proj.k_scale": "model-00005-of-00086.safetensors", "model.layers.6.self_attn.k_proj.weight": "model-00005-of-00086.safetensors", "model.layers.6.self_attn.k_proj.weight_scale": "model-00005-of-00086.safetensors", "model.layers.6.self_attn.o_proj.input_scale": "model-00005-of-00086.safetensors", @@ -1912,6 +2077,7 @@ "model.layers.6.self_attn.q_proj.weight": "model-00005-of-00086.safetensors", "model.layers.6.self_attn.q_proj.weight_scale": "model-00005-of-00086.safetensors", "model.layers.6.self_attn.v_proj.input_scale": "model-00005-of-00086.safetensors", + "model.layers.6.self_attn.v_proj.v_scale": "model-00005-of-00086.safetensors", "model.layers.6.self_attn.v_proj.weight": "model-00005-of-00086.safetensors", "model.layers.6.self_attn.v_proj.weight_scale": "model-00005-of-00086.safetensors", "model.layers.60.input_layernorm.weight": "model-00042-of-00086.safetensors", @@ -1926,6 +2092,7 @@ "model.layers.60.mlp.up_proj.weight_scale": "model-00042-of-00086.safetensors", "model.layers.60.post_attention_layernorm.weight": "model-00042-of-00086.safetensors", "model.layers.60.self_attn.k_proj.input_scale": "model-00041-of-00086.safetensors", + "model.layers.60.self_attn.k_proj.k_scale": "model-00041-of-00086.safetensors", "model.layers.60.self_attn.k_proj.weight": "model-00041-of-00086.safetensors", "model.layers.60.self_attn.k_proj.weight_scale": "model-00041-of-00086.safetensors", "model.layers.60.self_attn.o_proj.input_scale": "model-00041-of-00086.safetensors", @@ -1935,6 +2102,7 @@ "model.layers.60.self_attn.q_proj.weight": "model-00041-of-00086.safetensors", "model.layers.60.self_attn.q_proj.weight_scale": "model-00041-of-00086.safetensors", "model.layers.60.self_attn.v_proj.input_scale": "model-00041-of-00086.safetensors", + "model.layers.60.self_attn.v_proj.v_scale": "model-00041-of-00086.safetensors", "model.layers.60.self_attn.v_proj.weight": "model-00041-of-00086.safetensors", "model.layers.60.self_attn.v_proj.weight_scale": "model-00041-of-00086.safetensors", "model.layers.61.input_layernorm.weight": "model-00043-of-00086.safetensors", @@ -1949,6 +2117,7 @@ "model.layers.61.mlp.up_proj.weight_scale": "model-00042-of-00086.safetensors", "model.layers.61.post_attention_layernorm.weight": "model-00043-of-00086.safetensors", "model.layers.61.self_attn.k_proj.input_scale": "model-00042-of-00086.safetensors", + "model.layers.61.self_attn.k_proj.k_scale": "model-00042-of-00086.safetensors", "model.layers.61.self_attn.k_proj.weight": "model-00042-of-00086.safetensors", "model.layers.61.self_attn.k_proj.weight_scale": "model-00042-of-00086.safetensors", "model.layers.61.self_attn.o_proj.input_scale": "model-00042-of-00086.safetensors", @@ -1958,6 +2127,7 @@ "model.layers.61.self_attn.q_proj.weight": "model-00042-of-00086.safetensors", "model.layers.61.self_attn.q_proj.weight_scale": "model-00042-of-00086.safetensors", "model.layers.61.self_attn.v_proj.input_scale": "model-00042-of-00086.safetensors", + "model.layers.61.self_attn.v_proj.v_scale": "model-00042-of-00086.safetensors", "model.layers.61.self_attn.v_proj.weight": "model-00042-of-00086.safetensors", "model.layers.61.self_attn.v_proj.weight_scale": "model-00042-of-00086.safetensors", "model.layers.62.input_layernorm.weight": "model-00043-of-00086.safetensors", @@ -1972,6 +2142,7 @@ "model.layers.62.mlp.up_proj.weight_scale": "model-00043-of-00086.safetensors", "model.layers.62.post_attention_layernorm.weight": "model-00043-of-00086.safetensors", "model.layers.62.self_attn.k_proj.input_scale": "model-00043-of-00086.safetensors", + "model.layers.62.self_attn.k_proj.k_scale": "model-00043-of-00086.safetensors", "model.layers.62.self_attn.k_proj.weight": "model-00043-of-00086.safetensors", "model.layers.62.self_attn.k_proj.weight_scale": "model-00043-of-00086.safetensors", "model.layers.62.self_attn.o_proj.input_scale": "model-00043-of-00086.safetensors", @@ -1981,6 +2152,7 @@ "model.layers.62.self_attn.q_proj.weight": "model-00043-of-00086.safetensors", "model.layers.62.self_attn.q_proj.weight_scale": "model-00043-of-00086.safetensors", "model.layers.62.self_attn.v_proj.input_scale": "model-00043-of-00086.safetensors", + "model.layers.62.self_attn.v_proj.v_scale": "model-00043-of-00086.safetensors", "model.layers.62.self_attn.v_proj.weight": "model-00043-of-00086.safetensors", "model.layers.62.self_attn.v_proj.weight_scale": "model-00043-of-00086.safetensors", "model.layers.63.input_layernorm.weight": "model-00044-of-00086.safetensors", @@ -1995,6 +2167,7 @@ "model.layers.63.mlp.up_proj.weight_scale": "model-00044-of-00086.safetensors", "model.layers.63.post_attention_layernorm.weight": "model-00044-of-00086.safetensors", "model.layers.63.self_attn.k_proj.input_scale": "model-00043-of-00086.safetensors", + "model.layers.63.self_attn.k_proj.k_scale": "model-00043-of-00086.safetensors", "model.layers.63.self_attn.k_proj.weight": "model-00043-of-00086.safetensors", "model.layers.63.self_attn.k_proj.weight_scale": "model-00043-of-00086.safetensors", "model.layers.63.self_attn.o_proj.input_scale": "model-00043-of-00086.safetensors", @@ -2004,6 +2177,7 @@ "model.layers.63.self_attn.q_proj.weight": "model-00043-of-00086.safetensors", "model.layers.63.self_attn.q_proj.weight_scale": "model-00043-of-00086.safetensors", "model.layers.63.self_attn.v_proj.input_scale": "model-00043-of-00086.safetensors", + "model.layers.63.self_attn.v_proj.v_scale": "model-00043-of-00086.safetensors", "model.layers.63.self_attn.v_proj.weight": "model-00043-of-00086.safetensors", "model.layers.63.self_attn.v_proj.weight_scale": "model-00043-of-00086.safetensors", "model.layers.64.input_layernorm.weight": "model-00045-of-00086.safetensors", @@ -2018,6 +2192,7 @@ "model.layers.64.mlp.up_proj.weight_scale": "model-00044-of-00086.safetensors", "model.layers.64.post_attention_layernorm.weight": "model-00045-of-00086.safetensors", "model.layers.64.self_attn.k_proj.input_scale": "model-00044-of-00086.safetensors", + "model.layers.64.self_attn.k_proj.k_scale": "model-00044-of-00086.safetensors", "model.layers.64.self_attn.k_proj.weight": "model-00044-of-00086.safetensors", "model.layers.64.self_attn.k_proj.weight_scale": "model-00044-of-00086.safetensors", "model.layers.64.self_attn.o_proj.input_scale": "model-00044-of-00086.safetensors", @@ -2027,6 +2202,7 @@ "model.layers.64.self_attn.q_proj.weight": "model-00044-of-00086.safetensors", "model.layers.64.self_attn.q_proj.weight_scale": "model-00044-of-00086.safetensors", "model.layers.64.self_attn.v_proj.input_scale": "model-00044-of-00086.safetensors", + "model.layers.64.self_attn.v_proj.v_scale": "model-00044-of-00086.safetensors", "model.layers.64.self_attn.v_proj.weight": "model-00044-of-00086.safetensors", "model.layers.64.self_attn.v_proj.weight_scale": "model-00044-of-00086.safetensors", "model.layers.65.input_layernorm.weight": "model-00045-of-00086.safetensors", @@ -2041,6 +2217,7 @@ "model.layers.65.mlp.up_proj.weight_scale": "model-00045-of-00086.safetensors", "model.layers.65.post_attention_layernorm.weight": "model-00045-of-00086.safetensors", "model.layers.65.self_attn.k_proj.input_scale": "model-00045-of-00086.safetensors", + "model.layers.65.self_attn.k_proj.k_scale": "model-00045-of-00086.safetensors", "model.layers.65.self_attn.k_proj.weight": "model-00045-of-00086.safetensors", "model.layers.65.self_attn.k_proj.weight_scale": "model-00045-of-00086.safetensors", "model.layers.65.self_attn.o_proj.input_scale": "model-00045-of-00086.safetensors", @@ -2050,6 +2227,7 @@ "model.layers.65.self_attn.q_proj.weight": "model-00045-of-00086.safetensors", "model.layers.65.self_attn.q_proj.weight_scale": "model-00045-of-00086.safetensors", "model.layers.65.self_attn.v_proj.input_scale": "model-00045-of-00086.safetensors", + "model.layers.65.self_attn.v_proj.v_scale": "model-00045-of-00086.safetensors", "model.layers.65.self_attn.v_proj.weight": "model-00045-of-00086.safetensors", "model.layers.65.self_attn.v_proj.weight_scale": "model-00045-of-00086.safetensors", "model.layers.66.input_layernorm.weight": "model-00046-of-00086.safetensors", @@ -2064,6 +2242,7 @@ "model.layers.66.mlp.up_proj.weight_scale": "model-00046-of-00086.safetensors", "model.layers.66.post_attention_layernorm.weight": "model-00046-of-00086.safetensors", "model.layers.66.self_attn.k_proj.input_scale": "model-00045-of-00086.safetensors", + "model.layers.66.self_attn.k_proj.k_scale": "model-00045-of-00086.safetensors", "model.layers.66.self_attn.k_proj.weight": "model-00045-of-00086.safetensors", "model.layers.66.self_attn.k_proj.weight_scale": "model-00045-of-00086.safetensors", "model.layers.66.self_attn.o_proj.input_scale": "model-00045-of-00086.safetensors", @@ -2073,6 +2252,7 @@ "model.layers.66.self_attn.q_proj.weight": "model-00045-of-00086.safetensors", "model.layers.66.self_attn.q_proj.weight_scale": "model-00045-of-00086.safetensors", "model.layers.66.self_attn.v_proj.input_scale": "model-00045-of-00086.safetensors", + "model.layers.66.self_attn.v_proj.v_scale": "model-00045-of-00086.safetensors", "model.layers.66.self_attn.v_proj.weight": "model-00045-of-00086.safetensors", "model.layers.66.self_attn.v_proj.weight_scale": "model-00045-of-00086.safetensors", "model.layers.67.input_layernorm.weight": "model-00047-of-00086.safetensors", @@ -2087,6 +2267,7 @@ "model.layers.67.mlp.up_proj.weight_scale": "model-00046-of-00086.safetensors", "model.layers.67.post_attention_layernorm.weight": "model-00047-of-00086.safetensors", "model.layers.67.self_attn.k_proj.input_scale": "model-00046-of-00086.safetensors", + "model.layers.67.self_attn.k_proj.k_scale": "model-00046-of-00086.safetensors", "model.layers.67.self_attn.k_proj.weight": "model-00046-of-00086.safetensors", "model.layers.67.self_attn.k_proj.weight_scale": "model-00046-of-00086.safetensors", "model.layers.67.self_attn.o_proj.input_scale": "model-00046-of-00086.safetensors", @@ -2096,6 +2277,7 @@ "model.layers.67.self_attn.q_proj.weight": "model-00046-of-00086.safetensors", "model.layers.67.self_attn.q_proj.weight_scale": "model-00046-of-00086.safetensors", "model.layers.67.self_attn.v_proj.input_scale": "model-00046-of-00086.safetensors", + "model.layers.67.self_attn.v_proj.v_scale": "model-00046-of-00086.safetensors", "model.layers.67.self_attn.v_proj.weight": "model-00046-of-00086.safetensors", "model.layers.67.self_attn.v_proj.weight_scale": "model-00046-of-00086.safetensors", "model.layers.68.input_layernorm.weight": "model-00047-of-00086.safetensors", @@ -2110,6 +2292,7 @@ "model.layers.68.mlp.up_proj.weight_scale": "model-00047-of-00086.safetensors", "model.layers.68.post_attention_layernorm.weight": "model-00047-of-00086.safetensors", "model.layers.68.self_attn.k_proj.input_scale": "model-00047-of-00086.safetensors", + "model.layers.68.self_attn.k_proj.k_scale": "model-00047-of-00086.safetensors", "model.layers.68.self_attn.k_proj.weight": "model-00047-of-00086.safetensors", "model.layers.68.self_attn.k_proj.weight_scale": "model-00047-of-00086.safetensors", "model.layers.68.self_attn.o_proj.input_scale": "model-00047-of-00086.safetensors", @@ -2119,6 +2302,7 @@ "model.layers.68.self_attn.q_proj.weight": "model-00047-of-00086.safetensors", "model.layers.68.self_attn.q_proj.weight_scale": "model-00047-of-00086.safetensors", "model.layers.68.self_attn.v_proj.input_scale": "model-00047-of-00086.safetensors", + "model.layers.68.self_attn.v_proj.v_scale": "model-00047-of-00086.safetensors", "model.layers.68.self_attn.v_proj.weight": "model-00047-of-00086.safetensors", "model.layers.68.self_attn.v_proj.weight_scale": "model-00047-of-00086.safetensors", "model.layers.69.input_layernorm.weight": "model-00048-of-00086.safetensors", @@ -2133,6 +2317,7 @@ "model.layers.69.mlp.up_proj.weight_scale": "model-00048-of-00086.safetensors", "model.layers.69.post_attention_layernorm.weight": "model-00048-of-00086.safetensors", "model.layers.69.self_attn.k_proj.input_scale": "model-00047-of-00086.safetensors", + "model.layers.69.self_attn.k_proj.k_scale": "model-00047-of-00086.safetensors", "model.layers.69.self_attn.k_proj.weight": "model-00047-of-00086.safetensors", "model.layers.69.self_attn.k_proj.weight_scale": "model-00047-of-00086.safetensors", "model.layers.69.self_attn.o_proj.input_scale": "model-00047-of-00086.safetensors", @@ -2142,6 +2327,7 @@ "model.layers.69.self_attn.q_proj.weight": "model-00047-of-00086.safetensors", "model.layers.69.self_attn.q_proj.weight_scale": "model-00047-of-00086.safetensors", "model.layers.69.self_attn.v_proj.input_scale": "model-00047-of-00086.safetensors", + "model.layers.69.self_attn.v_proj.v_scale": "model-00047-of-00086.safetensors", "model.layers.69.self_attn.v_proj.weight": "model-00047-of-00086.safetensors", "model.layers.69.self_attn.v_proj.weight_scale": "model-00047-of-00086.safetensors", "model.layers.7.input_layernorm.weight": "model-00007-of-00086.safetensors", @@ -2156,6 +2342,7 @@ "model.layers.7.mlp.up_proj.weight_scale": "model-00006-of-00086.safetensors", "model.layers.7.post_attention_layernorm.weight": "model-00007-of-00086.safetensors", "model.layers.7.self_attn.k_proj.input_scale": "model-00006-of-00086.safetensors", + "model.layers.7.self_attn.k_proj.k_scale": "model-00006-of-00086.safetensors", "model.layers.7.self_attn.k_proj.weight": "model-00006-of-00086.safetensors", "model.layers.7.self_attn.k_proj.weight_scale": "model-00006-of-00086.safetensors", "model.layers.7.self_attn.o_proj.input_scale": "model-00006-of-00086.safetensors", @@ -2165,6 +2352,7 @@ "model.layers.7.self_attn.q_proj.weight": "model-00006-of-00086.safetensors", "model.layers.7.self_attn.q_proj.weight_scale": "model-00006-of-00086.safetensors", "model.layers.7.self_attn.v_proj.input_scale": "model-00006-of-00086.safetensors", + "model.layers.7.self_attn.v_proj.v_scale": "model-00006-of-00086.safetensors", "model.layers.7.self_attn.v_proj.weight": "model-00006-of-00086.safetensors", "model.layers.7.self_attn.v_proj.weight_scale": "model-00006-of-00086.safetensors", "model.layers.70.input_layernorm.weight": "model-00049-of-00086.safetensors", @@ -2179,6 +2367,7 @@ "model.layers.70.mlp.up_proj.weight_scale": "model-00048-of-00086.safetensors", "model.layers.70.post_attention_layernorm.weight": "model-00049-of-00086.safetensors", "model.layers.70.self_attn.k_proj.input_scale": "model-00048-of-00086.safetensors", + "model.layers.70.self_attn.k_proj.k_scale": "model-00048-of-00086.safetensors", "model.layers.70.self_attn.k_proj.weight": "model-00048-of-00086.safetensors", "model.layers.70.self_attn.k_proj.weight_scale": "model-00048-of-00086.safetensors", "model.layers.70.self_attn.o_proj.input_scale": "model-00048-of-00086.safetensors", @@ -2188,6 +2377,7 @@ "model.layers.70.self_attn.q_proj.weight": "model-00048-of-00086.safetensors", "model.layers.70.self_attn.q_proj.weight_scale": "model-00048-of-00086.safetensors", "model.layers.70.self_attn.v_proj.input_scale": "model-00048-of-00086.safetensors", + "model.layers.70.self_attn.v_proj.v_scale": "model-00048-of-00086.safetensors", "model.layers.70.self_attn.v_proj.weight": "model-00048-of-00086.safetensors", "model.layers.70.self_attn.v_proj.weight_scale": "model-00048-of-00086.safetensors", "model.layers.71.input_layernorm.weight": "model-00049-of-00086.safetensors", @@ -2202,6 +2392,7 @@ "model.layers.71.mlp.up_proj.weight_scale": "model-00049-of-00086.safetensors", "model.layers.71.post_attention_layernorm.weight": "model-00049-of-00086.safetensors", "model.layers.71.self_attn.k_proj.input_scale": "model-00049-of-00086.safetensors", + "model.layers.71.self_attn.k_proj.k_scale": "model-00049-of-00086.safetensors", "model.layers.71.self_attn.k_proj.weight": "model-00049-of-00086.safetensors", "model.layers.71.self_attn.k_proj.weight_scale": "model-00049-of-00086.safetensors", "model.layers.71.self_attn.o_proj.input_scale": "model-00049-of-00086.safetensors", @@ -2211,6 +2402,7 @@ "model.layers.71.self_attn.q_proj.weight": "model-00049-of-00086.safetensors", "model.layers.71.self_attn.q_proj.weight_scale": "model-00049-of-00086.safetensors", "model.layers.71.self_attn.v_proj.input_scale": "model-00049-of-00086.safetensors", + "model.layers.71.self_attn.v_proj.v_scale": "model-00049-of-00086.safetensors", "model.layers.71.self_attn.v_proj.weight": "model-00049-of-00086.safetensors", "model.layers.71.self_attn.v_proj.weight_scale": "model-00049-of-00086.safetensors", "model.layers.72.input_layernorm.weight": "model-00050-of-00086.safetensors", @@ -2225,6 +2417,7 @@ "model.layers.72.mlp.up_proj.weight_scale": "model-00050-of-00086.safetensors", "model.layers.72.post_attention_layernorm.weight": "model-00050-of-00086.safetensors", "model.layers.72.self_attn.k_proj.input_scale": "model-00049-of-00086.safetensors", + "model.layers.72.self_attn.k_proj.k_scale": "model-00049-of-00086.safetensors", "model.layers.72.self_attn.k_proj.weight": "model-00049-of-00086.safetensors", "model.layers.72.self_attn.k_proj.weight_scale": "model-00049-of-00086.safetensors", "model.layers.72.self_attn.o_proj.input_scale": "model-00049-of-00086.safetensors", @@ -2234,6 +2427,7 @@ "model.layers.72.self_attn.q_proj.weight": "model-00049-of-00086.safetensors", "model.layers.72.self_attn.q_proj.weight_scale": "model-00049-of-00086.safetensors", "model.layers.72.self_attn.v_proj.input_scale": "model-00049-of-00086.safetensors", + "model.layers.72.self_attn.v_proj.v_scale": "model-00049-of-00086.safetensors", "model.layers.72.self_attn.v_proj.weight": "model-00049-of-00086.safetensors", "model.layers.72.self_attn.v_proj.weight_scale": "model-00049-of-00086.safetensors", "model.layers.73.input_layernorm.weight": "model-00051-of-00086.safetensors", @@ -2248,6 +2442,7 @@ "model.layers.73.mlp.up_proj.weight_scale": "model-00050-of-00086.safetensors", "model.layers.73.post_attention_layernorm.weight": "model-00051-of-00086.safetensors", "model.layers.73.self_attn.k_proj.input_scale": "model-00050-of-00086.safetensors", + "model.layers.73.self_attn.k_proj.k_scale": "model-00050-of-00086.safetensors", "model.layers.73.self_attn.k_proj.weight": "model-00050-of-00086.safetensors", "model.layers.73.self_attn.k_proj.weight_scale": "model-00050-of-00086.safetensors", "model.layers.73.self_attn.o_proj.input_scale": "model-00050-of-00086.safetensors", @@ -2257,6 +2452,7 @@ "model.layers.73.self_attn.q_proj.weight": "model-00050-of-00086.safetensors", "model.layers.73.self_attn.q_proj.weight_scale": "model-00050-of-00086.safetensors", "model.layers.73.self_attn.v_proj.input_scale": "model-00050-of-00086.safetensors", + "model.layers.73.self_attn.v_proj.v_scale": "model-00050-of-00086.safetensors", "model.layers.73.self_attn.v_proj.weight": "model-00050-of-00086.safetensors", "model.layers.73.self_attn.v_proj.weight_scale": "model-00050-of-00086.safetensors", "model.layers.74.input_layernorm.weight": "model-00051-of-00086.safetensors", @@ -2271,6 +2467,7 @@ "model.layers.74.mlp.up_proj.weight_scale": "model-00051-of-00086.safetensors", "model.layers.74.post_attention_layernorm.weight": "model-00051-of-00086.safetensors", "model.layers.74.self_attn.k_proj.input_scale": "model-00051-of-00086.safetensors", + "model.layers.74.self_attn.k_proj.k_scale": "model-00051-of-00086.safetensors", "model.layers.74.self_attn.k_proj.weight": "model-00051-of-00086.safetensors", "model.layers.74.self_attn.k_proj.weight_scale": "model-00051-of-00086.safetensors", "model.layers.74.self_attn.o_proj.input_scale": "model-00051-of-00086.safetensors", @@ -2280,6 +2477,7 @@ "model.layers.74.self_attn.q_proj.weight": "model-00051-of-00086.safetensors", "model.layers.74.self_attn.q_proj.weight_scale": "model-00051-of-00086.safetensors", "model.layers.74.self_attn.v_proj.input_scale": "model-00051-of-00086.safetensors", + "model.layers.74.self_attn.v_proj.v_scale": "model-00051-of-00086.safetensors", "model.layers.74.self_attn.v_proj.weight": "model-00051-of-00086.safetensors", "model.layers.74.self_attn.v_proj.weight_scale": "model-00051-of-00086.safetensors", "model.layers.75.input_layernorm.weight": "model-00052-of-00086.safetensors", @@ -2294,6 +2492,7 @@ "model.layers.75.mlp.up_proj.weight_scale": "model-00052-of-00086.safetensors", "model.layers.75.post_attention_layernorm.weight": "model-00052-of-00086.safetensors", "model.layers.75.self_attn.k_proj.input_scale": "model-00051-of-00086.safetensors", + "model.layers.75.self_attn.k_proj.k_scale": "model-00051-of-00086.safetensors", "model.layers.75.self_attn.k_proj.weight": "model-00051-of-00086.safetensors", "model.layers.75.self_attn.k_proj.weight_scale": "model-00051-of-00086.safetensors", "model.layers.75.self_attn.o_proj.input_scale": "model-00051-of-00086.safetensors", @@ -2303,6 +2502,7 @@ "model.layers.75.self_attn.q_proj.weight": "model-00051-of-00086.safetensors", "model.layers.75.self_attn.q_proj.weight_scale": "model-00051-of-00086.safetensors", "model.layers.75.self_attn.v_proj.input_scale": "model-00051-of-00086.safetensors", + "model.layers.75.self_attn.v_proj.v_scale": "model-00051-of-00086.safetensors", "model.layers.75.self_attn.v_proj.weight": "model-00051-of-00086.safetensors", "model.layers.75.self_attn.v_proj.weight_scale": "model-00051-of-00086.safetensors", "model.layers.76.input_layernorm.weight": "model-00053-of-00086.safetensors", @@ -2317,6 +2517,7 @@ "model.layers.76.mlp.up_proj.weight_scale": "model-00052-of-00086.safetensors", "model.layers.76.post_attention_layernorm.weight": "model-00053-of-00086.safetensors", "model.layers.76.self_attn.k_proj.input_scale": "model-00052-of-00086.safetensors", + "model.layers.76.self_attn.k_proj.k_scale": "model-00052-of-00086.safetensors", "model.layers.76.self_attn.k_proj.weight": "model-00052-of-00086.safetensors", "model.layers.76.self_attn.k_proj.weight_scale": "model-00052-of-00086.safetensors", "model.layers.76.self_attn.o_proj.input_scale": "model-00052-of-00086.safetensors", @@ -2326,6 +2527,7 @@ "model.layers.76.self_attn.q_proj.weight": "model-00052-of-00086.safetensors", "model.layers.76.self_attn.q_proj.weight_scale": "model-00052-of-00086.safetensors", "model.layers.76.self_attn.v_proj.input_scale": "model-00052-of-00086.safetensors", + "model.layers.76.self_attn.v_proj.v_scale": "model-00052-of-00086.safetensors", "model.layers.76.self_attn.v_proj.weight": "model-00052-of-00086.safetensors", "model.layers.76.self_attn.v_proj.weight_scale": "model-00052-of-00086.safetensors", "model.layers.77.input_layernorm.weight": "model-00053-of-00086.safetensors", @@ -2340,6 +2542,7 @@ "model.layers.77.mlp.up_proj.weight_scale": "model-00053-of-00086.safetensors", "model.layers.77.post_attention_layernorm.weight": "model-00053-of-00086.safetensors", "model.layers.77.self_attn.k_proj.input_scale": "model-00053-of-00086.safetensors", + "model.layers.77.self_attn.k_proj.k_scale": "model-00053-of-00086.safetensors", "model.layers.77.self_attn.k_proj.weight": "model-00053-of-00086.safetensors", "model.layers.77.self_attn.k_proj.weight_scale": "model-00053-of-00086.safetensors", "model.layers.77.self_attn.o_proj.input_scale": "model-00053-of-00086.safetensors", @@ -2349,6 +2552,7 @@ "model.layers.77.self_attn.q_proj.weight": "model-00053-of-00086.safetensors", "model.layers.77.self_attn.q_proj.weight_scale": "model-00053-of-00086.safetensors", "model.layers.77.self_attn.v_proj.input_scale": "model-00053-of-00086.safetensors", + "model.layers.77.self_attn.v_proj.v_scale": "model-00053-of-00086.safetensors", "model.layers.77.self_attn.v_proj.weight": "model-00053-of-00086.safetensors", "model.layers.77.self_attn.v_proj.weight_scale": "model-00053-of-00086.safetensors", "model.layers.78.input_layernorm.weight": "model-00054-of-00086.safetensors", @@ -2363,6 +2567,7 @@ "model.layers.78.mlp.up_proj.weight_scale": "model-00054-of-00086.safetensors", "model.layers.78.post_attention_layernorm.weight": "model-00054-of-00086.safetensors", "model.layers.78.self_attn.k_proj.input_scale": "model-00053-of-00086.safetensors", + "model.layers.78.self_attn.k_proj.k_scale": "model-00053-of-00086.safetensors", "model.layers.78.self_attn.k_proj.weight": "model-00053-of-00086.safetensors", "model.layers.78.self_attn.k_proj.weight_scale": "model-00053-of-00086.safetensors", "model.layers.78.self_attn.o_proj.input_scale": "model-00053-of-00086.safetensors", @@ -2372,6 +2577,7 @@ "model.layers.78.self_attn.q_proj.weight": "model-00053-of-00086.safetensors", "model.layers.78.self_attn.q_proj.weight_scale": "model-00053-of-00086.safetensors", "model.layers.78.self_attn.v_proj.input_scale": "model-00053-of-00086.safetensors", + "model.layers.78.self_attn.v_proj.v_scale": "model-00053-of-00086.safetensors", "model.layers.78.self_attn.v_proj.weight": "model-00053-of-00086.safetensors", "model.layers.78.self_attn.v_proj.weight_scale": "model-00053-of-00086.safetensors", "model.layers.79.input_layernorm.weight": "model-00055-of-00086.safetensors", @@ -2386,6 +2592,7 @@ "model.layers.79.mlp.up_proj.weight_scale": "model-00054-of-00086.safetensors", "model.layers.79.post_attention_layernorm.weight": "model-00055-of-00086.safetensors", "model.layers.79.self_attn.k_proj.input_scale": "model-00054-of-00086.safetensors", + "model.layers.79.self_attn.k_proj.k_scale": "model-00054-of-00086.safetensors", "model.layers.79.self_attn.k_proj.weight": "model-00054-of-00086.safetensors", "model.layers.79.self_attn.k_proj.weight_scale": "model-00054-of-00086.safetensors", "model.layers.79.self_attn.o_proj.input_scale": "model-00054-of-00086.safetensors", @@ -2395,6 +2602,7 @@ "model.layers.79.self_attn.q_proj.weight": "model-00054-of-00086.safetensors", "model.layers.79.self_attn.q_proj.weight_scale": "model-00054-of-00086.safetensors", "model.layers.79.self_attn.v_proj.input_scale": "model-00054-of-00086.safetensors", + "model.layers.79.self_attn.v_proj.v_scale": "model-00054-of-00086.safetensors", "model.layers.79.self_attn.v_proj.weight": "model-00054-of-00086.safetensors", "model.layers.79.self_attn.v_proj.weight_scale": "model-00054-of-00086.safetensors", "model.layers.8.input_layernorm.weight": "model-00007-of-00086.safetensors", @@ -2409,6 +2617,7 @@ "model.layers.8.mlp.up_proj.weight_scale": "model-00007-of-00086.safetensors", "model.layers.8.post_attention_layernorm.weight": "model-00007-of-00086.safetensors", "model.layers.8.self_attn.k_proj.input_scale": "model-00007-of-00086.safetensors", + "model.layers.8.self_attn.k_proj.k_scale": "model-00007-of-00086.safetensors", "model.layers.8.self_attn.k_proj.weight": "model-00007-of-00086.safetensors", "model.layers.8.self_attn.k_proj.weight_scale": "model-00007-of-00086.safetensors", "model.layers.8.self_attn.o_proj.input_scale": "model-00007-of-00086.safetensors", @@ -2418,6 +2627,7 @@ "model.layers.8.self_attn.q_proj.weight": "model-00007-of-00086.safetensors", "model.layers.8.self_attn.q_proj.weight_scale": "model-00007-of-00086.safetensors", "model.layers.8.self_attn.v_proj.input_scale": "model-00007-of-00086.safetensors", + "model.layers.8.self_attn.v_proj.v_scale": "model-00007-of-00086.safetensors", "model.layers.8.self_attn.v_proj.weight": "model-00007-of-00086.safetensors", "model.layers.8.self_attn.v_proj.weight_scale": "model-00007-of-00086.safetensors", "model.layers.80.input_layernorm.weight": "model-00055-of-00086.safetensors", @@ -2432,6 +2642,7 @@ "model.layers.80.mlp.up_proj.weight_scale": "model-00055-of-00086.safetensors", "model.layers.80.post_attention_layernorm.weight": "model-00055-of-00086.safetensors", "model.layers.80.self_attn.k_proj.input_scale": "model-00055-of-00086.safetensors", + "model.layers.80.self_attn.k_proj.k_scale": "model-00055-of-00086.safetensors", "model.layers.80.self_attn.k_proj.weight": "model-00055-of-00086.safetensors", "model.layers.80.self_attn.k_proj.weight_scale": "model-00055-of-00086.safetensors", "model.layers.80.self_attn.o_proj.input_scale": "model-00055-of-00086.safetensors", @@ -2441,6 +2652,7 @@ "model.layers.80.self_attn.q_proj.weight": "model-00055-of-00086.safetensors", "model.layers.80.self_attn.q_proj.weight_scale": "model-00055-of-00086.safetensors", "model.layers.80.self_attn.v_proj.input_scale": "model-00055-of-00086.safetensors", + "model.layers.80.self_attn.v_proj.v_scale": "model-00055-of-00086.safetensors", "model.layers.80.self_attn.v_proj.weight": "model-00055-of-00086.safetensors", "model.layers.80.self_attn.v_proj.weight_scale": "model-00055-of-00086.safetensors", "model.layers.81.input_layernorm.weight": "model-00056-of-00086.safetensors", @@ -2455,6 +2667,7 @@ "model.layers.81.mlp.up_proj.weight_scale": "model-00056-of-00086.safetensors", "model.layers.81.post_attention_layernorm.weight": "model-00056-of-00086.safetensors", "model.layers.81.self_attn.k_proj.input_scale": "model-00055-of-00086.safetensors", + "model.layers.81.self_attn.k_proj.k_scale": "model-00055-of-00086.safetensors", "model.layers.81.self_attn.k_proj.weight": "model-00055-of-00086.safetensors", "model.layers.81.self_attn.k_proj.weight_scale": "model-00055-of-00086.safetensors", "model.layers.81.self_attn.o_proj.input_scale": "model-00055-of-00086.safetensors", @@ -2464,6 +2677,7 @@ "model.layers.81.self_attn.q_proj.weight": "model-00055-of-00086.safetensors", "model.layers.81.self_attn.q_proj.weight_scale": "model-00055-of-00086.safetensors", "model.layers.81.self_attn.v_proj.input_scale": "model-00055-of-00086.safetensors", + "model.layers.81.self_attn.v_proj.v_scale": "model-00055-of-00086.safetensors", "model.layers.81.self_attn.v_proj.weight": "model-00055-of-00086.safetensors", "model.layers.81.self_attn.v_proj.weight_scale": "model-00055-of-00086.safetensors", "model.layers.82.input_layernorm.weight": "model-00057-of-00086.safetensors", @@ -2478,6 +2692,7 @@ "model.layers.82.mlp.up_proj.weight_scale": "model-00056-of-00086.safetensors", "model.layers.82.post_attention_layernorm.weight": "model-00057-of-00086.safetensors", "model.layers.82.self_attn.k_proj.input_scale": "model-00056-of-00086.safetensors", + "model.layers.82.self_attn.k_proj.k_scale": "model-00056-of-00086.safetensors", "model.layers.82.self_attn.k_proj.weight": "model-00056-of-00086.safetensors", "model.layers.82.self_attn.k_proj.weight_scale": "model-00056-of-00086.safetensors", "model.layers.82.self_attn.o_proj.input_scale": "model-00056-of-00086.safetensors", @@ -2487,6 +2702,7 @@ "model.layers.82.self_attn.q_proj.weight": "model-00056-of-00086.safetensors", "model.layers.82.self_attn.q_proj.weight_scale": "model-00056-of-00086.safetensors", "model.layers.82.self_attn.v_proj.input_scale": "model-00056-of-00086.safetensors", + "model.layers.82.self_attn.v_proj.v_scale": "model-00056-of-00086.safetensors", "model.layers.82.self_attn.v_proj.weight": "model-00056-of-00086.safetensors", "model.layers.82.self_attn.v_proj.weight_scale": "model-00056-of-00086.safetensors", "model.layers.83.input_layernorm.weight": "model-00057-of-00086.safetensors", @@ -2501,6 +2717,7 @@ "model.layers.83.mlp.up_proj.weight_scale": "model-00057-of-00086.safetensors", "model.layers.83.post_attention_layernorm.weight": "model-00057-of-00086.safetensors", "model.layers.83.self_attn.k_proj.input_scale": "model-00057-of-00086.safetensors", + "model.layers.83.self_attn.k_proj.k_scale": "model-00057-of-00086.safetensors", "model.layers.83.self_attn.k_proj.weight": "model-00057-of-00086.safetensors", "model.layers.83.self_attn.k_proj.weight_scale": "model-00057-of-00086.safetensors", "model.layers.83.self_attn.o_proj.input_scale": "model-00057-of-00086.safetensors", @@ -2510,6 +2727,7 @@ "model.layers.83.self_attn.q_proj.weight": "model-00057-of-00086.safetensors", "model.layers.83.self_attn.q_proj.weight_scale": "model-00057-of-00086.safetensors", "model.layers.83.self_attn.v_proj.input_scale": "model-00057-of-00086.safetensors", + "model.layers.83.self_attn.v_proj.v_scale": "model-00057-of-00086.safetensors", "model.layers.83.self_attn.v_proj.weight": "model-00057-of-00086.safetensors", "model.layers.83.self_attn.v_proj.weight_scale": "model-00057-of-00086.safetensors", "model.layers.84.input_layernorm.weight": "model-00058-of-00086.safetensors", @@ -2524,6 +2742,7 @@ "model.layers.84.mlp.up_proj.weight_scale": "model-00058-of-00086.safetensors", "model.layers.84.post_attention_layernorm.weight": "model-00058-of-00086.safetensors", "model.layers.84.self_attn.k_proj.input_scale": "model-00057-of-00086.safetensors", + "model.layers.84.self_attn.k_proj.k_scale": "model-00057-of-00086.safetensors", "model.layers.84.self_attn.k_proj.weight": "model-00057-of-00086.safetensors", "model.layers.84.self_attn.k_proj.weight_scale": "model-00057-of-00086.safetensors", "model.layers.84.self_attn.o_proj.input_scale": "model-00057-of-00086.safetensors", @@ -2533,6 +2752,7 @@ "model.layers.84.self_attn.q_proj.weight": "model-00057-of-00086.safetensors", "model.layers.84.self_attn.q_proj.weight_scale": "model-00057-of-00086.safetensors", "model.layers.84.self_attn.v_proj.input_scale": "model-00057-of-00086.safetensors", + "model.layers.84.self_attn.v_proj.v_scale": "model-00057-of-00086.safetensors", "model.layers.84.self_attn.v_proj.weight": "model-00057-of-00086.safetensors", "model.layers.84.self_attn.v_proj.weight_scale": "model-00057-of-00086.safetensors", "model.layers.85.input_layernorm.weight": "model-00059-of-00086.safetensors", @@ -2547,6 +2767,7 @@ "model.layers.85.mlp.up_proj.weight_scale": "model-00058-of-00086.safetensors", "model.layers.85.post_attention_layernorm.weight": "model-00059-of-00086.safetensors", "model.layers.85.self_attn.k_proj.input_scale": "model-00058-of-00086.safetensors", + "model.layers.85.self_attn.k_proj.k_scale": "model-00058-of-00086.safetensors", "model.layers.85.self_attn.k_proj.weight": "model-00058-of-00086.safetensors", "model.layers.85.self_attn.k_proj.weight_scale": "model-00058-of-00086.safetensors", "model.layers.85.self_attn.o_proj.input_scale": "model-00058-of-00086.safetensors", @@ -2556,6 +2777,7 @@ "model.layers.85.self_attn.q_proj.weight": "model-00058-of-00086.safetensors", "model.layers.85.self_attn.q_proj.weight_scale": "model-00058-of-00086.safetensors", "model.layers.85.self_attn.v_proj.input_scale": "model-00058-of-00086.safetensors", + "model.layers.85.self_attn.v_proj.v_scale": "model-00058-of-00086.safetensors", "model.layers.85.self_attn.v_proj.weight": "model-00058-of-00086.safetensors", "model.layers.85.self_attn.v_proj.weight_scale": "model-00058-of-00086.safetensors", "model.layers.86.input_layernorm.weight": "model-00059-of-00086.safetensors", @@ -2570,6 +2792,7 @@ "model.layers.86.mlp.up_proj.weight_scale": "model-00059-of-00086.safetensors", "model.layers.86.post_attention_layernorm.weight": "model-00059-of-00086.safetensors", "model.layers.86.self_attn.k_proj.input_scale": "model-00059-of-00086.safetensors", + "model.layers.86.self_attn.k_proj.k_scale": "model-00059-of-00086.safetensors", "model.layers.86.self_attn.k_proj.weight": "model-00059-of-00086.safetensors", "model.layers.86.self_attn.k_proj.weight_scale": "model-00059-of-00086.safetensors", "model.layers.86.self_attn.o_proj.input_scale": "model-00059-of-00086.safetensors", @@ -2579,6 +2802,7 @@ "model.layers.86.self_attn.q_proj.weight": "model-00059-of-00086.safetensors", "model.layers.86.self_attn.q_proj.weight_scale": "model-00059-of-00086.safetensors", "model.layers.86.self_attn.v_proj.input_scale": "model-00059-of-00086.safetensors", + "model.layers.86.self_attn.v_proj.v_scale": "model-00059-of-00086.safetensors", "model.layers.86.self_attn.v_proj.weight": "model-00059-of-00086.safetensors", "model.layers.86.self_attn.v_proj.weight_scale": "model-00059-of-00086.safetensors", "model.layers.87.input_layernorm.weight": "model-00060-of-00086.safetensors", @@ -2593,6 +2817,7 @@ "model.layers.87.mlp.up_proj.weight_scale": "model-00060-of-00086.safetensors", "model.layers.87.post_attention_layernorm.weight": "model-00060-of-00086.safetensors", "model.layers.87.self_attn.k_proj.input_scale": "model-00059-of-00086.safetensors", + "model.layers.87.self_attn.k_proj.k_scale": "model-00059-of-00086.safetensors", "model.layers.87.self_attn.k_proj.weight": "model-00059-of-00086.safetensors", "model.layers.87.self_attn.k_proj.weight_scale": "model-00059-of-00086.safetensors", "model.layers.87.self_attn.o_proj.input_scale": "model-00059-of-00086.safetensors", @@ -2602,6 +2827,7 @@ "model.layers.87.self_attn.q_proj.weight": "model-00059-of-00086.safetensors", "model.layers.87.self_attn.q_proj.weight_scale": "model-00059-of-00086.safetensors", "model.layers.87.self_attn.v_proj.input_scale": "model-00059-of-00086.safetensors", + "model.layers.87.self_attn.v_proj.v_scale": "model-00059-of-00086.safetensors", "model.layers.87.self_attn.v_proj.weight": "model-00059-of-00086.safetensors", "model.layers.87.self_attn.v_proj.weight_scale": "model-00059-of-00086.safetensors", "model.layers.88.input_layernorm.weight": "model-00061-of-00086.safetensors", @@ -2616,6 +2842,7 @@ "model.layers.88.mlp.up_proj.weight_scale": "model-00060-of-00086.safetensors", "model.layers.88.post_attention_layernorm.weight": "model-00061-of-00086.safetensors", "model.layers.88.self_attn.k_proj.input_scale": "model-00060-of-00086.safetensors", + "model.layers.88.self_attn.k_proj.k_scale": "model-00060-of-00086.safetensors", "model.layers.88.self_attn.k_proj.weight": "model-00060-of-00086.safetensors", "model.layers.88.self_attn.k_proj.weight_scale": "model-00060-of-00086.safetensors", "model.layers.88.self_attn.o_proj.input_scale": "model-00060-of-00086.safetensors", @@ -2625,6 +2852,7 @@ "model.layers.88.self_attn.q_proj.weight": "model-00060-of-00086.safetensors", "model.layers.88.self_attn.q_proj.weight_scale": "model-00060-of-00086.safetensors", "model.layers.88.self_attn.v_proj.input_scale": "model-00060-of-00086.safetensors", + "model.layers.88.self_attn.v_proj.v_scale": "model-00060-of-00086.safetensors", "model.layers.88.self_attn.v_proj.weight": "model-00060-of-00086.safetensors", "model.layers.88.self_attn.v_proj.weight_scale": "model-00060-of-00086.safetensors", "model.layers.89.input_layernorm.weight": "model-00061-of-00086.safetensors", @@ -2639,6 +2867,7 @@ "model.layers.89.mlp.up_proj.weight_scale": "model-00061-of-00086.safetensors", "model.layers.89.post_attention_layernorm.weight": "model-00061-of-00086.safetensors", "model.layers.89.self_attn.k_proj.input_scale": "model-00061-of-00086.safetensors", + "model.layers.89.self_attn.k_proj.k_scale": "model-00061-of-00086.safetensors", "model.layers.89.self_attn.k_proj.weight": "model-00061-of-00086.safetensors", "model.layers.89.self_attn.k_proj.weight_scale": "model-00061-of-00086.safetensors", "model.layers.89.self_attn.o_proj.input_scale": "model-00061-of-00086.safetensors", @@ -2648,6 +2877,7 @@ "model.layers.89.self_attn.q_proj.weight": "model-00061-of-00086.safetensors", "model.layers.89.self_attn.q_proj.weight_scale": "model-00061-of-00086.safetensors", "model.layers.89.self_attn.v_proj.input_scale": "model-00061-of-00086.safetensors", + "model.layers.89.self_attn.v_proj.v_scale": "model-00061-of-00086.safetensors", "model.layers.89.self_attn.v_proj.weight": "model-00061-of-00086.safetensors", "model.layers.89.self_attn.v_proj.weight_scale": "model-00061-of-00086.safetensors", "model.layers.9.input_layernorm.weight": "model-00008-of-00086.safetensors", @@ -2662,6 +2892,7 @@ "model.layers.9.mlp.up_proj.weight_scale": "model-00008-of-00086.safetensors", "model.layers.9.post_attention_layernorm.weight": "model-00008-of-00086.safetensors", "model.layers.9.self_attn.k_proj.input_scale": "model-00007-of-00086.safetensors", + "model.layers.9.self_attn.k_proj.k_scale": "model-00007-of-00086.safetensors", "model.layers.9.self_attn.k_proj.weight": "model-00007-of-00086.safetensors", "model.layers.9.self_attn.k_proj.weight_scale": "model-00007-of-00086.safetensors", "model.layers.9.self_attn.o_proj.input_scale": "model-00007-of-00086.safetensors", @@ -2671,6 +2902,7 @@ "model.layers.9.self_attn.q_proj.weight": "model-00007-of-00086.safetensors", "model.layers.9.self_attn.q_proj.weight_scale": "model-00007-of-00086.safetensors", "model.layers.9.self_attn.v_proj.input_scale": "model-00007-of-00086.safetensors", + "model.layers.9.self_attn.v_proj.v_scale": "model-00007-of-00086.safetensors", "model.layers.9.self_attn.v_proj.weight": "model-00007-of-00086.safetensors", "model.layers.9.self_attn.v_proj.weight_scale": "model-00007-of-00086.safetensors", "model.layers.90.input_layernorm.weight": "model-00062-of-00086.safetensors", @@ -2685,6 +2917,7 @@ "model.layers.90.mlp.up_proj.weight_scale": "model-00062-of-00086.safetensors", "model.layers.90.post_attention_layernorm.weight": "model-00062-of-00086.safetensors", "model.layers.90.self_attn.k_proj.input_scale": "model-00061-of-00086.safetensors", + "model.layers.90.self_attn.k_proj.k_scale": "model-00061-of-00086.safetensors", "model.layers.90.self_attn.k_proj.weight": "model-00061-of-00086.safetensors", "model.layers.90.self_attn.k_proj.weight_scale": "model-00061-of-00086.safetensors", "model.layers.90.self_attn.o_proj.input_scale": "model-00061-of-00086.safetensors", @@ -2694,6 +2927,7 @@ "model.layers.90.self_attn.q_proj.weight": "model-00061-of-00086.safetensors", "model.layers.90.self_attn.q_proj.weight_scale": "model-00061-of-00086.safetensors", "model.layers.90.self_attn.v_proj.input_scale": "model-00061-of-00086.safetensors", + "model.layers.90.self_attn.v_proj.v_scale": "model-00061-of-00086.safetensors", "model.layers.90.self_attn.v_proj.weight": "model-00061-of-00086.safetensors", "model.layers.90.self_attn.v_proj.weight_scale": "model-00061-of-00086.safetensors", "model.layers.91.input_layernorm.weight": "model-00063-of-00086.safetensors", @@ -2708,6 +2942,7 @@ "model.layers.91.mlp.up_proj.weight_scale": "model-00062-of-00086.safetensors", "model.layers.91.post_attention_layernorm.weight": "model-00063-of-00086.safetensors", "model.layers.91.self_attn.k_proj.input_scale": "model-00062-of-00086.safetensors", + "model.layers.91.self_attn.k_proj.k_scale": "model-00062-of-00086.safetensors", "model.layers.91.self_attn.k_proj.weight": "model-00062-of-00086.safetensors", "model.layers.91.self_attn.k_proj.weight_scale": "model-00062-of-00086.safetensors", "model.layers.91.self_attn.o_proj.input_scale": "model-00062-of-00086.safetensors", @@ -2717,6 +2952,7 @@ "model.layers.91.self_attn.q_proj.weight": "model-00062-of-00086.safetensors", "model.layers.91.self_attn.q_proj.weight_scale": "model-00062-of-00086.safetensors", "model.layers.91.self_attn.v_proj.input_scale": "model-00062-of-00086.safetensors", + "model.layers.91.self_attn.v_proj.v_scale": "model-00062-of-00086.safetensors", "model.layers.91.self_attn.v_proj.weight": "model-00062-of-00086.safetensors", "model.layers.91.self_attn.v_proj.weight_scale": "model-00062-of-00086.safetensors", "model.layers.92.input_layernorm.weight": "model-00063-of-00086.safetensors", @@ -2731,6 +2967,7 @@ "model.layers.92.mlp.up_proj.weight_scale": "model-00063-of-00086.safetensors", "model.layers.92.post_attention_layernorm.weight": "model-00063-of-00086.safetensors", "model.layers.92.self_attn.k_proj.input_scale": "model-00063-of-00086.safetensors", + "model.layers.92.self_attn.k_proj.k_scale": "model-00063-of-00086.safetensors", "model.layers.92.self_attn.k_proj.weight": "model-00063-of-00086.safetensors", "model.layers.92.self_attn.k_proj.weight_scale": "model-00063-of-00086.safetensors", "model.layers.92.self_attn.o_proj.input_scale": "model-00063-of-00086.safetensors", @@ -2740,6 +2977,7 @@ "model.layers.92.self_attn.q_proj.weight": "model-00063-of-00086.safetensors", "model.layers.92.self_attn.q_proj.weight_scale": "model-00063-of-00086.safetensors", "model.layers.92.self_attn.v_proj.input_scale": "model-00063-of-00086.safetensors", + "model.layers.92.self_attn.v_proj.v_scale": "model-00063-of-00086.safetensors", "model.layers.92.self_attn.v_proj.weight": "model-00063-of-00086.safetensors", "model.layers.92.self_attn.v_proj.weight_scale": "model-00063-of-00086.safetensors", "model.layers.93.input_layernorm.weight": "model-00064-of-00086.safetensors", @@ -2754,6 +2992,7 @@ "model.layers.93.mlp.up_proj.weight_scale": "model-00064-of-00086.safetensors", "model.layers.93.post_attention_layernorm.weight": "model-00064-of-00086.safetensors", "model.layers.93.self_attn.k_proj.input_scale": "model-00063-of-00086.safetensors", + "model.layers.93.self_attn.k_proj.k_scale": "model-00063-of-00086.safetensors", "model.layers.93.self_attn.k_proj.weight": "model-00063-of-00086.safetensors", "model.layers.93.self_attn.k_proj.weight_scale": "model-00063-of-00086.safetensors", "model.layers.93.self_attn.o_proj.input_scale": "model-00063-of-00086.safetensors", @@ -2763,6 +3002,7 @@ "model.layers.93.self_attn.q_proj.weight": "model-00063-of-00086.safetensors", "model.layers.93.self_attn.q_proj.weight_scale": "model-00063-of-00086.safetensors", "model.layers.93.self_attn.v_proj.input_scale": "model-00063-of-00086.safetensors", + "model.layers.93.self_attn.v_proj.v_scale": "model-00063-of-00086.safetensors", "model.layers.93.self_attn.v_proj.weight": "model-00063-of-00086.safetensors", "model.layers.93.self_attn.v_proj.weight_scale": "model-00063-of-00086.safetensors", "model.layers.94.input_layernorm.weight": "model-00065-of-00086.safetensors", @@ -2777,6 +3017,7 @@ "model.layers.94.mlp.up_proj.weight_scale": "model-00064-of-00086.safetensors", "model.layers.94.post_attention_layernorm.weight": "model-00065-of-00086.safetensors", "model.layers.94.self_attn.k_proj.input_scale": "model-00064-of-00086.safetensors", + "model.layers.94.self_attn.k_proj.k_scale": "model-00064-of-00086.safetensors", "model.layers.94.self_attn.k_proj.weight": "model-00064-of-00086.safetensors", "model.layers.94.self_attn.k_proj.weight_scale": "model-00064-of-00086.safetensors", "model.layers.94.self_attn.o_proj.input_scale": "model-00064-of-00086.safetensors", @@ -2786,6 +3027,7 @@ "model.layers.94.self_attn.q_proj.weight": "model-00064-of-00086.safetensors", "model.layers.94.self_attn.q_proj.weight_scale": "model-00064-of-00086.safetensors", "model.layers.94.self_attn.v_proj.input_scale": "model-00064-of-00086.safetensors", + "model.layers.94.self_attn.v_proj.v_scale": "model-00064-of-00086.safetensors", "model.layers.94.self_attn.v_proj.weight": "model-00064-of-00086.safetensors", "model.layers.94.self_attn.v_proj.weight_scale": "model-00064-of-00086.safetensors", "model.layers.95.input_layernorm.weight": "model-00065-of-00086.safetensors", @@ -2800,6 +3042,7 @@ "model.layers.95.mlp.up_proj.weight_scale": "model-00065-of-00086.safetensors", "model.layers.95.post_attention_layernorm.weight": "model-00065-of-00086.safetensors", "model.layers.95.self_attn.k_proj.input_scale": "model-00065-of-00086.safetensors", + "model.layers.95.self_attn.k_proj.k_scale": "model-00065-of-00086.safetensors", "model.layers.95.self_attn.k_proj.weight": "model-00065-of-00086.safetensors", "model.layers.95.self_attn.k_proj.weight_scale": "model-00065-of-00086.safetensors", "model.layers.95.self_attn.o_proj.input_scale": "model-00065-of-00086.safetensors", @@ -2809,6 +3052,7 @@ "model.layers.95.self_attn.q_proj.weight": "model-00065-of-00086.safetensors", "model.layers.95.self_attn.q_proj.weight_scale": "model-00065-of-00086.safetensors", "model.layers.95.self_attn.v_proj.input_scale": "model-00065-of-00086.safetensors", + "model.layers.95.self_attn.v_proj.v_scale": "model-00065-of-00086.safetensors", "model.layers.95.self_attn.v_proj.weight": "model-00065-of-00086.safetensors", "model.layers.95.self_attn.v_proj.weight_scale": "model-00065-of-00086.safetensors", "model.layers.96.input_layernorm.weight": "model-00066-of-00086.safetensors", @@ -2823,6 +3067,7 @@ "model.layers.96.mlp.up_proj.weight_scale": "model-00066-of-00086.safetensors", "model.layers.96.post_attention_layernorm.weight": "model-00066-of-00086.safetensors", "model.layers.96.self_attn.k_proj.input_scale": "model-00065-of-00086.safetensors", + "model.layers.96.self_attn.k_proj.k_scale": "model-00065-of-00086.safetensors", "model.layers.96.self_attn.k_proj.weight": "model-00065-of-00086.safetensors", "model.layers.96.self_attn.k_proj.weight_scale": "model-00065-of-00086.safetensors", "model.layers.96.self_attn.o_proj.input_scale": "model-00065-of-00086.safetensors", @@ -2832,6 +3077,7 @@ "model.layers.96.self_attn.q_proj.weight": "model-00065-of-00086.safetensors", "model.layers.96.self_attn.q_proj.weight_scale": "model-00065-of-00086.safetensors", "model.layers.96.self_attn.v_proj.input_scale": "model-00065-of-00086.safetensors", + "model.layers.96.self_attn.v_proj.v_scale": "model-00065-of-00086.safetensors", "model.layers.96.self_attn.v_proj.weight": "model-00065-of-00086.safetensors", "model.layers.96.self_attn.v_proj.weight_scale": "model-00065-of-00086.safetensors", "model.layers.97.input_layernorm.weight": "model-00067-of-00086.safetensors", @@ -2846,6 +3092,7 @@ "model.layers.97.mlp.up_proj.weight_scale": "model-00066-of-00086.safetensors", "model.layers.97.post_attention_layernorm.weight": "model-00067-of-00086.safetensors", "model.layers.97.self_attn.k_proj.input_scale": "model-00066-of-00086.safetensors", + "model.layers.97.self_attn.k_proj.k_scale": "model-00066-of-00086.safetensors", "model.layers.97.self_attn.k_proj.weight": "model-00066-of-00086.safetensors", "model.layers.97.self_attn.k_proj.weight_scale": "model-00066-of-00086.safetensors", "model.layers.97.self_attn.o_proj.input_scale": "model-00066-of-00086.safetensors", @@ -2855,6 +3102,7 @@ "model.layers.97.self_attn.q_proj.weight": "model-00066-of-00086.safetensors", "model.layers.97.self_attn.q_proj.weight_scale": "model-00066-of-00086.safetensors", "model.layers.97.self_attn.v_proj.input_scale": "model-00066-of-00086.safetensors", + "model.layers.97.self_attn.v_proj.v_scale": "model-00066-of-00086.safetensors", "model.layers.97.self_attn.v_proj.weight": "model-00066-of-00086.safetensors", "model.layers.97.self_attn.v_proj.weight_scale": "model-00066-of-00086.safetensors", "model.layers.98.input_layernorm.weight": "model-00067-of-00086.safetensors", @@ -2869,6 +3117,7 @@ "model.layers.98.mlp.up_proj.weight_scale": "model-00067-of-00086.safetensors", "model.layers.98.post_attention_layernorm.weight": "model-00067-of-00086.safetensors", "model.layers.98.self_attn.k_proj.input_scale": "model-00067-of-00086.safetensors", + "model.layers.98.self_attn.k_proj.k_scale": "model-00067-of-00086.safetensors", "model.layers.98.self_attn.k_proj.weight": "model-00067-of-00086.safetensors", "model.layers.98.self_attn.k_proj.weight_scale": "model-00067-of-00086.safetensors", "model.layers.98.self_attn.o_proj.input_scale": "model-00067-of-00086.safetensors", @@ -2878,6 +3127,7 @@ "model.layers.98.self_attn.q_proj.weight": "model-00067-of-00086.safetensors", "model.layers.98.self_attn.q_proj.weight_scale": "model-00067-of-00086.safetensors", "model.layers.98.self_attn.v_proj.input_scale": "model-00067-of-00086.safetensors", + "model.layers.98.self_attn.v_proj.v_scale": "model-00067-of-00086.safetensors", "model.layers.98.self_attn.v_proj.weight": "model-00067-of-00086.safetensors", "model.layers.98.self_attn.v_proj.weight_scale": "model-00067-of-00086.safetensors", "model.layers.99.input_layernorm.weight": "model-00068-of-00086.safetensors", @@ -2892,6 +3142,7 @@ "model.layers.99.mlp.up_proj.weight_scale": "model-00068-of-00086.safetensors", "model.layers.99.post_attention_layernorm.weight": "model-00068-of-00086.safetensors", "model.layers.99.self_attn.k_proj.input_scale": "model-00067-of-00086.safetensors", + "model.layers.99.self_attn.k_proj.k_scale": "model-00067-of-00086.safetensors", "model.layers.99.self_attn.k_proj.weight": "model-00067-of-00086.safetensors", "model.layers.99.self_attn.k_proj.weight_scale": "model-00067-of-00086.safetensors", "model.layers.99.self_attn.o_proj.input_scale": "model-00067-of-00086.safetensors", @@ -2901,6 +3152,7 @@ "model.layers.99.self_attn.q_proj.weight": "model-00067-of-00086.safetensors", "model.layers.99.self_attn.q_proj.weight_scale": "model-00067-of-00086.safetensors", "model.layers.99.self_attn.v_proj.input_scale": "model-00067-of-00086.safetensors", + "model.layers.99.self_attn.v_proj.v_scale": "model-00067-of-00086.safetensors", "model.layers.99.self_attn.v_proj.weight": "model-00067-of-00086.safetensors", "model.layers.99.self_attn.v_proj.weight_scale": "model-00067-of-00086.safetensors", "model.norm.weight": "model-00085-of-00086.safetensors" diff --git a/special_tokens_map.json b/special_tokens_map.json old mode 100644 new mode 100755 index 04829afa78a2d2df203ac846968db37269b01f7f..b43be96621d147110fb8a18b5776ec6e38516127 --- a/special_tokens_map.json +++ b/special_tokens_map.json @@ -7,11 +7,11 @@ "single_word": false }, "eos_token": { - "content": "<|end_of_text|>", + "content": "<|eot_id|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false }, - "pad_token": "<|end_of_text|>" + "pad_token": "<|eot_id|>" } diff --git a/tokenizer.json b/tokenizer.json old mode 100644 new mode 100755 index 2bac1a86a87dd66d9e953215a4c76f7151c65994..ca315983b2dfda86c55842792bec2b6d5c1bc7cf --- a/tokenizer.json +++ b/tokenizer.json @@ -10,9 +10,9 @@ "strategy": "BatchLongest", "direction": "Left", "pad_to_multiple_of": null, - "pad_id": 128001, + "pad_id": 128009, "pad_type_id": 0, - "pad_token": "<|end_of_text|>" + "pad_token": "<|eot_id|>" }, "added_tokens": [ { diff --git a/tokenizer_config.json b/tokenizer_config.json old mode 100644 new mode 100755 index db88166e2bc4c799fd5d1ae643b75e84d03ee70e..b6e4e4df0ecc9c7f724a657ad557fae021e2b830 --- a/tokenizer_config.json +++ b/tokenizer_config.json @@ -2058,5 +2058,6 @@ "attention_mask" ], "model_max_length": 131072, + "pad_token": "<|eot_id|>", "tokenizer_class": "PreTrainedTokenizerFast" }