philschmid ArtifactAI commited on Jan 27, 2023

Commit

4b5a3ab

0 Parent(s):

Duplicate from Artifact-AI/flan-t5-xxl-sharded-fp16

Browse files

Co-authored-by: ArtifactAI <[email protected]>

Files changed (22) hide show

.gitattributes +34 -0
README.md +3 -0
config.json +31 -0
handler.py +33 -0
pytorch_model-00001-of-00012.bin +3 -0
pytorch_model-00002-of-00012.bin +3 -0
pytorch_model-00003-of-00012.bin +3 -0
pytorch_model-00004-of-00012.bin +3 -0
pytorch_model-00005-of-00012.bin +3 -0
pytorch_model-00006-of-00012.bin +3 -0
pytorch_model-00007-of-00012.bin +3 -0
pytorch_model-00008-of-00012.bin +3 -0
pytorch_model-00009-of-00012.bin +3 -0
pytorch_model-00010-of-00012.bin +3 -0
pytorch_model-00011-of-00012.bin +3 -0
pytorch_model-00012-of-00012.bin +3 -0
pytorch_model.bin.index.json +567 -0
requirements.txt +2 -0
special_tokens_map.json +107 -0
spiece.model +3 -0
tokenizer.json +0 -0
tokenizer_config.json +113 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,34 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,3 @@

+---
+duplicated_from: Artifact-AI/flan-t5-xxl-sharded-fp16
+---

config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_name_or_path": "google/flan-t5-xxl",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "d_ff": 10240,
+  "d_kv": 64,
+  "d_model": 4096,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "gelu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "gelu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": true,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "num_decoder_layers": 24,
+  "num_heads": 64,
+  "num_layers": 24,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float16",
+  "transformers_version": "4.25.0.dev0",
+  "use_cache": true,
+  "vocab_size": 32128
+}

handler.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from typing import Dict, List, Any
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+import torch
+class EndpointHandler:
+    def __init__(self, path=""):
+        # load model and processor from path
+        self.model =  AutoModelForSeq2SeqLM.from_pretrained(path, device_map="auto", load_in_8bit=True)
+        self.tokenizer = AutoTokenizer.from_pretrained(path)
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
+        """
+        Args:
+            data (:obj:):
+                includes the deserialized image file as PIL.Image
+        """
+        # process input
+        inputs = data.pop("inputs", data)
+        parameters = data.pop("parameters", None)
+        # preprocess
+        input_ids = self.tokenizer(inputs, return_tensors="pt").input_ids
+        # pass inputs with all kwargs in data
+        if parameters is not None:
+            outputs = self.model.generate(input_ids, **parameters)
+        else:
+            outputs = self.model.generate(input_ids)
+        # postprocess the prediction
+        prediction = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        return [{"generated_text": prediction}]

pytorch_model-00001-of-00012.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1e25f5c8cfa76b741c571af80f08b56ac46d7aae4bcf1e55b4b359d12b0b982a
+size 1722882745

pytorch_model-00002-of-00012.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:23a9f14cd291f32dc632ae54f00d1a6f7ebbbf30f684f5514a8ba940edb31090
+size 1929475486

pytorch_model-00003-of-00012.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:54f96c4fbb5b528f2ddc33107c35f06c3d6f08243998a97dc734ba2b9b957de7
+size 1929475550

pytorch_model-00004-of-00012.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8aa543869df8ad0a36ce2d2e0c0431f5b6c125c4b24b85b143647d6538869b8e
+size 1929475550

pytorch_model-00005-of-00012.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c4593d667851e4352aa3e1000b6d1eb0dc3afc8822ba0917e75aee621f7fda0
+size 1929475550

pytorch_model-00006-of-00012.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e57baaf6618267c41a09a1f3ebb67527318371b4b0c00c2c0b03170ee1bfc71d
+size 1974577874

pytorch_model-00007-of-00012.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f1e35ae8cc9db61cf8abfe2d2d8bcfd33e0d885b4d29ab5032001273252e1800
+size 1929485961

pytorch_model-00008-of-00012.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0b0b31563b92b809080bd070ee4ab3512e8a713519f547ccefd3853cff69d3a3
+size 1996604032

pytorch_model-00009-of-00012.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb40124c9985352cfbde18cf84059206d6cc77c873daf8593712e5570b0af32a
+size 1996604032

pytorch_model-00010-of-00012.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:38e8581dd6612693a70c5053eb761900518275b3cead15f775f124799fe5def4
+size 1979817673

pytorch_model-00011-of-00012.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:082657b7b9ceacdaf7801650fe6449a813fcb1a78860137cb6dd573c73dc2839
+size 1979817673

pytorch_model-00012-of-00012.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b1343f7f89d0d16119f9d0014d9694c094bfe7848dea8ea419bbc0338c33d73
+size 1236336721

pytorch_model.bin.index.json ADDED Viewed

	@@ -0,0 +1,567 @@

+{
+  "metadata": {
+    "total_size": 22797049856
+  },
+  "weight_map": {
+    "decoder.block.0.layer.0.SelfAttention.k.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.0.layer.0.SelfAttention.o.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.0.layer.0.SelfAttention.q.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.0.layer.0.SelfAttention.v.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.0.layer.0.layer_norm.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.0.layer.1.EncDecAttention.k.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.0.layer.1.EncDecAttention.o.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.0.layer.1.EncDecAttention.q.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.0.layer.1.EncDecAttention.v.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.0.layer.1.layer_norm.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.0.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.0.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.0.layer.2.DenseReluDense.wo.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.0.layer.2.layer_norm.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.1.layer.0.SelfAttention.k.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.1.layer.0.SelfAttention.o.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.1.layer.0.SelfAttention.q.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.1.layer.0.SelfAttention.v.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.1.layer.0.layer_norm.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.1.layer.1.EncDecAttention.k.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.1.layer.1.EncDecAttention.o.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.1.layer.1.EncDecAttention.q.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.1.layer.1.EncDecAttention.v.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.1.layer.1.layer_norm.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.1.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.1.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.1.layer.2.DenseReluDense.wo.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.1.layer.2.layer_norm.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.10.layer.0.SelfAttention.k.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.10.layer.0.SelfAttention.o.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.10.layer.0.SelfAttention.q.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.10.layer.0.SelfAttention.v.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.10.layer.0.layer_norm.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.10.layer.1.EncDecAttention.k.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.10.layer.1.EncDecAttention.o.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.10.layer.1.EncDecAttention.q.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.10.layer.1.EncDecAttention.v.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.10.layer.1.layer_norm.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.10.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.10.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.10.layer.2.DenseReluDense.wo.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.10.layer.2.layer_norm.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.11.layer.0.SelfAttention.k.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.11.layer.0.SelfAttention.o.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.11.layer.0.SelfAttention.q.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.11.layer.0.SelfAttention.v.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.11.layer.0.layer_norm.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.11.layer.1.EncDecAttention.k.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.11.layer.1.EncDecAttention.o.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.11.layer.1.EncDecAttention.q.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.11.layer.1.EncDecAttention.v.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.11.layer.1.layer_norm.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.11.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.11.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.11.layer.2.DenseReluDense.wo.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.11.layer.2.layer_norm.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.12.layer.0.SelfAttention.k.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.12.layer.0.SelfAttention.o.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.12.layer.0.SelfAttention.q.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.12.layer.0.SelfAttention.v.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.12.layer.0.layer_norm.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.12.layer.1.EncDecAttention.k.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.12.layer.1.EncDecAttention.o.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.12.layer.1.EncDecAttention.q.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.12.layer.1.EncDecAttention.v.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.12.layer.1.layer_norm.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.12.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.12.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.12.layer.2.DenseReluDense.wo.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.12.layer.2.layer_norm.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.13.layer.0.SelfAttention.k.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.13.layer.0.SelfAttention.o.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.13.layer.0.SelfAttention.q.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.13.layer.0.SelfAttention.v.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.13.layer.0.layer_norm.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.13.layer.1.EncDecAttention.k.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.13.layer.1.EncDecAttention.o.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.13.layer.1.EncDecAttention.q.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.13.layer.1.EncDecAttention.v.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.13.layer.1.layer_norm.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.13.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.13.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.13.layer.2.DenseReluDense.wo.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.13.layer.2.layer_norm.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.14.layer.0.SelfAttention.k.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.14.layer.0.SelfAttention.o.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.14.layer.0.SelfAttention.q.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.14.layer.0.SelfAttention.v.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.14.layer.0.layer_norm.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.14.layer.1.EncDecAttention.k.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.14.layer.1.EncDecAttention.o.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.14.layer.1.EncDecAttention.q.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.14.layer.1.EncDecAttention.v.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.14.layer.1.layer_norm.weight": "pytorch_model-00009-of-00012.bin",
+    "decoder.block.14.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.14.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.14.layer.2.DenseReluDense.wo.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.14.layer.2.layer_norm.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.15.layer.0.SelfAttention.k.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.15.layer.0.SelfAttention.o.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.15.layer.0.SelfAttention.q.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.15.layer.0.SelfAttention.v.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.15.layer.0.layer_norm.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.15.layer.1.EncDecAttention.k.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.15.layer.1.EncDecAttention.o.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.15.layer.1.EncDecAttention.q.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.15.layer.1.EncDecAttention.v.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.15.layer.1.layer_norm.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.15.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.15.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.15.layer.2.DenseReluDense.wo.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.15.layer.2.layer_norm.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.16.layer.0.SelfAttention.k.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.16.layer.0.SelfAttention.o.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.16.layer.0.SelfAttention.q.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.16.layer.0.SelfAttention.v.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.16.layer.0.layer_norm.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.16.layer.1.EncDecAttention.k.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.16.layer.1.EncDecAttention.o.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.16.layer.1.EncDecAttention.q.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.16.layer.1.EncDecAttention.v.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.16.layer.1.layer_norm.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.16.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.16.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.16.layer.2.DenseReluDense.wo.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.16.layer.2.layer_norm.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.17.layer.0.SelfAttention.k.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.17.layer.0.SelfAttention.o.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.17.layer.0.SelfAttention.q.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.17.layer.0.SelfAttention.v.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.17.layer.0.layer_norm.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.17.layer.1.EncDecAttention.k.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.17.layer.1.EncDecAttention.o.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.17.layer.1.EncDecAttention.q.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.17.layer.1.EncDecAttention.v.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.17.layer.1.layer_norm.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.17.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.17.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.17.layer.2.DenseReluDense.wo.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.17.layer.2.layer_norm.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.18.layer.0.SelfAttention.k.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.18.layer.0.SelfAttention.o.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.18.layer.0.SelfAttention.q.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.18.layer.0.SelfAttention.v.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.18.layer.0.layer_norm.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.18.layer.1.EncDecAttention.k.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.18.layer.1.EncDecAttention.o.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.18.layer.1.EncDecAttention.q.weight": "pytorch_model-00010-of-00012.bin",
+    "decoder.block.18.layer.1.EncDecAttention.v.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.18.layer.1.layer_norm.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.18.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.18.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.18.layer.2.DenseReluDense.wo.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.18.layer.2.layer_norm.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.19.layer.0.SelfAttention.k.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.19.layer.0.SelfAttention.o.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.19.layer.0.SelfAttention.q.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.19.layer.0.SelfAttention.v.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.19.layer.0.layer_norm.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.19.layer.1.EncDecAttention.k.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.19.layer.1.EncDecAttention.o.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.19.layer.1.EncDecAttention.q.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.19.layer.1.EncDecAttention.v.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.19.layer.1.layer_norm.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.19.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.19.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.19.layer.2.DenseReluDense.wo.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.19.layer.2.layer_norm.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.2.layer.0.SelfAttention.k.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.2.layer.0.SelfAttention.o.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.2.layer.0.SelfAttention.q.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.2.layer.0.SelfAttention.v.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.2.layer.0.layer_norm.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.2.layer.1.EncDecAttention.k.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.2.layer.1.EncDecAttention.o.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.2.layer.1.EncDecAttention.q.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.2.layer.1.EncDecAttention.v.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.2.layer.1.layer_norm.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.2.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.2.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.2.layer.2.DenseReluDense.wo.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.2.layer.2.layer_norm.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.20.layer.0.SelfAttention.k.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.20.layer.0.SelfAttention.o.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.20.layer.0.SelfAttention.q.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.20.layer.0.SelfAttention.v.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.20.layer.0.layer_norm.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.20.layer.1.EncDecAttention.k.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.20.layer.1.EncDecAttention.o.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.20.layer.1.EncDecAttention.q.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.20.layer.1.EncDecAttention.v.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.20.layer.1.layer_norm.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.20.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.20.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.20.layer.2.DenseReluDense.wo.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.20.layer.2.layer_norm.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.21.layer.0.SelfAttention.k.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.21.layer.0.SelfAttention.o.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.21.layer.0.SelfAttention.q.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.21.layer.0.SelfAttention.v.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.21.layer.0.layer_norm.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.21.layer.1.EncDecAttention.k.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.21.layer.1.EncDecAttention.o.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.21.layer.1.EncDecAttention.q.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.21.layer.1.EncDecAttention.v.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.21.layer.1.layer_norm.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.21.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.21.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.21.layer.2.DenseReluDense.wo.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.21.layer.2.layer_norm.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.22.layer.0.SelfAttention.k.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.22.layer.0.SelfAttention.o.weight": "pytorch_model-00012-of-00012.bin",
+    "decoder.block.22.layer.0.SelfAttention.q.weight": "pytorch_model-00011-of-00012.bin",
+    "decoder.block.22.layer.0.SelfAttention.v.weight": "pytorch_model-00012-of-00012.bin",
+    "decoder.block.22.layer.0.layer_norm.weight": "pytorch_model-00012-of-00012.bin",
+    "decoder.block.22.layer.1.EncDecAttention.k.weight": "pytorch_model-00012-of-00012.bin",
+    "decoder.block.22.layer.1.EncDecAttention.o.weight": "pytorch_model-00012-of-00012.bin",
+    "decoder.block.22.layer.1.EncDecAttention.q.weight": "pytorch_model-00012-of-00012.bin",
+    "decoder.block.22.layer.1.EncDecAttention.v.weight": "pytorch_model-00012-of-00012.bin",
+    "decoder.block.22.layer.1.layer_norm.weight": "pytorch_model-00012-of-00012.bin",
+    "decoder.block.22.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00012-of-00012.bin",
+    "decoder.block.22.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00012-of-00012.bin",
+    "decoder.block.22.layer.2.DenseReluDense.wo.weight": "pytorch_model-00012-of-00012.bin",
+    "decoder.block.22.layer.2.layer_norm.weight": "pytorch_model-00012-of-00012.bin",
+    "decoder.block.23.layer.0.SelfAttention.k.weight": "pytorch_model-00012-of-00012.bin",
+    "decoder.block.23.layer.0.SelfAttention.o.weight": "pytorch_model-00012-of-00012.bin",
+    "decoder.block.23.layer.0.SelfAttention.q.weight": "pytorch_model-00012-of-00012.bin",
+    "decoder.block.23.layer.0.SelfAttention.v.weight": "pytorch_model-00012-of-00012.bin",
+    "decoder.block.23.layer.0.layer_norm.weight": "pytorch_model-00012-of-00012.bin",
+    "decoder.block.23.layer.1.EncDecAttention.k.weight": "pytorch_model-00012-of-00012.bin",
+    "decoder.block.23.layer.1.EncDecAttention.o.weight": "pytorch_model-00012-of-00012.bin",
+    "decoder.block.23.layer.1.EncDecAttention.q.weight": "pytorch_model-00012-of-00012.bin",
+    "decoder.block.23.layer.1.EncDecAttention.v.weight": "pytorch_model-00012-of-00012.bin",
+    "decoder.block.23.layer.1.layer_norm.weight": "pytorch_model-00012-of-00012.bin",
+    "decoder.block.23.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00012-of-00012.bin",
+    "decoder.block.23.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00012-of-00012.bin",
+    "decoder.block.23.layer.2.DenseReluDense.wo.weight": "pytorch_model-00012-of-00012.bin",
+    "decoder.block.23.layer.2.layer_norm.weight": "pytorch_model-00012-of-00012.bin",
+    "decoder.block.3.layer.0.SelfAttention.k.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.3.layer.0.SelfAttention.o.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.3.layer.0.SelfAttention.q.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.block.3.layer.0.SelfAttention.v.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.3.layer.0.layer_norm.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.3.layer.1.EncDecAttention.k.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.3.layer.1.EncDecAttention.o.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.3.layer.1.EncDecAttention.q.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.3.layer.1.EncDecAttention.v.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.3.layer.1.layer_norm.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.3.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.3.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.3.layer.2.DenseReluDense.wo.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.3.layer.2.layer_norm.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.4.layer.0.SelfAttention.k.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.4.layer.0.SelfAttention.o.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.4.layer.0.SelfAttention.q.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.4.layer.0.SelfAttention.v.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.4.layer.0.layer_norm.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.4.layer.1.EncDecAttention.k.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.4.layer.1.EncDecAttention.o.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.4.layer.1.EncDecAttention.q.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.4.layer.1.EncDecAttention.v.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.4.layer.1.layer_norm.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.4.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.4.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.4.layer.2.DenseReluDense.wo.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.4.layer.2.layer_norm.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.5.layer.0.SelfAttention.k.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.5.layer.0.SelfAttention.o.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.5.layer.0.SelfAttention.q.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.5.layer.0.SelfAttention.v.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.5.layer.0.layer_norm.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.5.layer.1.EncDecAttention.k.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.5.layer.1.EncDecAttention.o.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.5.layer.1.EncDecAttention.q.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.5.layer.1.EncDecAttention.v.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.5.layer.1.layer_norm.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.5.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.5.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.5.layer.2.DenseReluDense.wo.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.5.layer.2.layer_norm.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.6.layer.0.SelfAttention.k.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.6.layer.0.SelfAttention.o.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.6.layer.0.SelfAttention.q.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.6.layer.0.SelfAttention.v.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.6.layer.0.layer_norm.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.6.layer.1.EncDecAttention.k.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.6.layer.1.EncDecAttention.o.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.6.layer.1.EncDecAttention.q.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.6.layer.1.EncDecAttention.v.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.6.layer.1.layer_norm.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.6.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.6.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00007-of-00012.bin",
+    "decoder.block.6.layer.2.DenseReluDense.wo.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.6.layer.2.layer_norm.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.7.layer.0.SelfAttention.k.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.7.layer.0.SelfAttention.o.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.7.layer.0.SelfAttention.q.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.7.layer.0.SelfAttention.v.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.7.layer.0.layer_norm.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.7.layer.1.EncDecAttention.k.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.7.layer.1.EncDecAttention.o.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.7.layer.1.EncDecAttention.q.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.7.layer.1.EncDecAttention.v.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.7.layer.1.layer_norm.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.7.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.7.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.7.layer.2.DenseReluDense.wo.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.7.layer.2.layer_norm.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.8.layer.0.SelfAttention.k.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.8.layer.0.SelfAttention.o.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.8.layer.0.SelfAttention.q.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.8.layer.0.SelfAttention.v.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.8.layer.0.layer_norm.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.8.layer.1.EncDecAttention.k.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.8.layer.1.EncDecAttention.o.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.8.layer.1.EncDecAttention.q.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.8.layer.1.EncDecAttention.v.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.8.layer.1.layer_norm.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.8.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.8.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.8.layer.2.DenseReluDense.wo.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.8.layer.2.layer_norm.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.9.layer.0.SelfAttention.k.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.9.layer.0.SelfAttention.o.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.9.layer.0.SelfAttention.q.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.9.layer.0.SelfAttention.v.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.9.layer.0.layer_norm.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.9.layer.1.EncDecAttention.k.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.9.layer.1.EncDecAttention.o.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.9.layer.1.EncDecAttention.q.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.9.layer.1.EncDecAttention.v.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.9.layer.1.layer_norm.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.9.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.9.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.9.layer.2.DenseReluDense.wo.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.block.9.layer.2.layer_norm.weight": "pytorch_model-00008-of-00012.bin",
+    "decoder.embed_tokens.weight": "pytorch_model-00006-of-00012.bin",
+    "decoder.final_layer_norm.weight": "pytorch_model-00012-of-00012.bin",
+    "encoder.block.0.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.0.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.0.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.0.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.0.layer.0.layer_norm.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.0.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.0.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.0.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.0.layer.1.layer_norm.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.1.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.1.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.1.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.1.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.1.layer.0.layer_norm.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.1.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.1.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.1.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.1.layer.1.layer_norm.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.10.layer.0.SelfAttention.k.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.10.layer.0.SelfAttention.o.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.10.layer.0.SelfAttention.q.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.10.layer.0.SelfAttention.v.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.10.layer.0.layer_norm.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.10.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.10.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.10.layer.1.DenseReluDense.wo.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.10.layer.1.layer_norm.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.11.layer.0.SelfAttention.k.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.11.layer.0.SelfAttention.o.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.11.layer.0.SelfAttention.q.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.11.layer.0.SelfAttention.v.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.11.layer.0.layer_norm.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.11.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.11.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.11.layer.1.DenseReluDense.wo.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.11.layer.1.layer_norm.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.12.layer.0.SelfAttention.k.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.12.layer.0.SelfAttention.o.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.12.layer.0.SelfAttention.q.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.12.layer.0.SelfAttention.v.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.12.layer.0.layer_norm.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.12.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.12.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.12.layer.1.DenseReluDense.wo.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.12.layer.1.layer_norm.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.13.layer.0.SelfAttention.k.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.13.layer.0.SelfAttention.o.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.13.layer.0.SelfAttention.q.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.13.layer.0.SelfAttention.v.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.13.layer.0.layer_norm.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.13.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.13.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.13.layer.1.DenseReluDense.wo.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.13.layer.1.layer_norm.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.14.layer.0.SelfAttention.k.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.14.layer.0.SelfAttention.o.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.14.layer.0.SelfAttention.q.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.14.layer.0.SelfAttention.v.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.14.layer.0.layer_norm.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.14.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.14.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.14.layer.1.DenseReluDense.wo.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.14.layer.1.layer_norm.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.15.layer.0.SelfAttention.k.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.15.layer.0.SelfAttention.o.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.15.layer.0.SelfAttention.q.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.15.layer.0.SelfAttention.v.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.15.layer.0.layer_norm.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.15.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.15.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.15.layer.1.DenseReluDense.wo.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.15.layer.1.layer_norm.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.16.layer.0.SelfAttention.k.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.16.layer.0.SelfAttention.o.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.16.layer.0.SelfAttention.q.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.16.layer.0.SelfAttention.v.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.16.layer.0.layer_norm.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.16.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.16.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.16.layer.1.DenseReluDense.wo.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.16.layer.1.layer_norm.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.17.layer.0.SelfAttention.k.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.17.layer.0.SelfAttention.o.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.17.layer.0.SelfAttention.q.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.17.layer.0.SelfAttention.v.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.17.layer.0.layer_norm.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.17.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.17.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.17.layer.1.DenseReluDense.wo.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.17.layer.1.layer_norm.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.18.layer.0.SelfAttention.k.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.18.layer.0.SelfAttention.o.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.18.layer.0.SelfAttention.q.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.18.layer.0.SelfAttention.v.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.18.layer.0.layer_norm.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.18.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.18.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00004-of-00012.bin",
+    "encoder.block.18.layer.1.DenseReluDense.wo.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.18.layer.1.layer_norm.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.19.layer.0.SelfAttention.k.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.19.layer.0.SelfAttention.o.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.19.layer.0.SelfAttention.q.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.19.layer.0.SelfAttention.v.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.19.layer.0.layer_norm.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.19.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.19.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.19.layer.1.DenseReluDense.wo.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.19.layer.1.layer_norm.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.2.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.2.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.2.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.2.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.2.layer.0.layer_norm.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.2.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.2.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.2.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.2.layer.1.layer_norm.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.20.layer.0.SelfAttention.k.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.20.layer.0.SelfAttention.o.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.20.layer.0.SelfAttention.q.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.20.layer.0.SelfAttention.v.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.20.layer.0.layer_norm.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.20.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.20.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.20.layer.1.DenseReluDense.wo.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.20.layer.1.layer_norm.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.21.layer.0.SelfAttention.k.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.21.layer.0.SelfAttention.o.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.21.layer.0.SelfAttention.q.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.21.layer.0.SelfAttention.v.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.21.layer.0.layer_norm.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.21.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.21.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.21.layer.1.DenseReluDense.wo.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.21.layer.1.layer_norm.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.22.layer.0.SelfAttention.k.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.22.layer.0.SelfAttention.o.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.22.layer.0.SelfAttention.q.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.22.layer.0.SelfAttention.v.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.22.layer.0.layer_norm.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.22.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.22.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.22.layer.1.DenseReluDense.wo.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.22.layer.1.layer_norm.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.23.layer.0.SelfAttention.k.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.23.layer.0.SelfAttention.o.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.23.layer.0.SelfAttention.q.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.23.layer.0.SelfAttention.v.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.23.layer.0.layer_norm.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.23.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.23.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00005-of-00012.bin",
+    "encoder.block.23.layer.1.DenseReluDense.wo.weight": "pytorch_model-00006-of-00012.bin",
+    "encoder.block.23.layer.1.layer_norm.weight": "pytorch_model-00006-of-00012.bin",
+    "encoder.block.3.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.3.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.3.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.3.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.3.layer.0.layer_norm.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.3.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.3.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.block.3.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.3.layer.1.layer_norm.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.4.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.4.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.4.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.4.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.4.layer.0.layer_norm.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.4.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.4.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.4.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.4.layer.1.layer_norm.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.5.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.5.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.5.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.5.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.5.layer.0.layer_norm.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.5.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.5.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.5.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.5.layer.1.layer_norm.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.6.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.6.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.6.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.6.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.6.layer.0.layer_norm.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.6.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.6.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.6.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.6.layer.1.layer_norm.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.7.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.7.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.7.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.7.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.7.layer.0.layer_norm.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.7.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.7.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.7.layer.1.DenseReluDense.wo.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.7.layer.1.layer_norm.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.8.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.8.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.8.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.8.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.8.layer.0.layer_norm.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.8.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.8.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00012.bin",
+    "encoder.block.8.layer.1.DenseReluDense.wo.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.8.layer.1.layer_norm.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.9.layer.0.SelfAttention.k.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.9.layer.0.SelfAttention.o.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.9.layer.0.SelfAttention.q.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.9.layer.0.SelfAttention.v.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.9.layer.0.layer_norm.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.9.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.9.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.9.layer.1.DenseReluDense.wo.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.block.9.layer.1.layer_norm.weight": "pytorch_model-00003-of-00012.bin",
+    "encoder.embed_tokens.weight": "pytorch_model-00001-of-00012.bin",
+    "encoder.final_layer_norm.weight": "pytorch_model-00006-of-00012.bin",
+    "lm_head.weight": "pytorch_model-00012-of-00012.bin",
+    "shared.weight": "pytorch_model-00001-of-00012.bin"
+  }
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ accelerate==0.13.2
2	+ bitsandbytes

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,107 @@

+{
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "eos_token": "</s>",
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

spiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
+size 791656

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,113 @@

+{
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "eos_token": "</s>",
+  "extra_ids": 100,
+  "model_max_length": 512,
+  "name_or_path": "google/flan-t5-xxl",
+  "pad_token": "<pad>",
+  "sp_model_kwargs": {},
+  "special_tokens_map_file": "/home/arthur_huggingface_co/.cache/huggingface/hub/models--google--t5-v1_1-small/snapshots/fb7e6cba609f7bab11c614294bc04f82f613c7b1/special_tokens_map.json",
+  "tokenizer_class": "T5Tokenizer",
+  "unk_token": "<unk>"
+}