Upload 6 files

Browse files

Files changed (6) hide show

config.json +60 -0
generation_config.json +7 -0
peft_attempt_1.ipynb +841 -0
special_tokens_map.json +107 -0
tokenizer.json +0 -0
tokenizer_config.json +111 -0

config.json ADDED Viewed

	@@ -0,0 +1,60 @@

+{
+  "_name_or_path": "microsoft/GODEL-v1_1-large-seq2seq",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "d_ff": 4096,
+  "d_kv": 64,
+  "d_model": 1024,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "relu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": false,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 24,
+  "num_heads": 16,
+  "num_layers": 24,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.31.0",
+  "use_cache": true,
+  "vocab_size": 32102
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.31.0"
+}

peft_attempt_1.ipynb ADDED Viewed

	@@ -0,0 +1,841 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "gpuType": "T4"
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install transformers"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "6_gaeY1UMPOv",
+        "outputId": "470ea044-c9b1-400e-f322-aafbdbae4aea"
+      },
+      "execution_count": 9,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.31.0)\n",
+            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.12.2)\n",
+            "Requirement already satisfied: huggingface-hub<1.0,>=0.14.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.16.4)\n",
+            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.22.4)\n",
+            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (23.1)\n",
+            "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.1)\n",
+            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2022.10.31)\n",
+            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.27.1)\n",
+            "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.13.3)\n",
+            "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.3.1)\n",
+            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.65.0)\n",
+            "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (2023.6.0)\n",
+            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (4.7.1)\n",
+            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (1.26.16)\n",
+            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2023.7.22)\n",
+            "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.0.12)\n",
+            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4)\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install peft"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "UkDCPUBOMh-L",
+        "outputId": "0c618ade-6b5b-4500-8063-a51c29880fb4"
+      },
+      "execution_count": 13,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Requirement already satisfied: peft in /usr/local/lib/python3.10/dist-packages (0.4.0)\n",
+            "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from peft) (1.22.4)\n",
+            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from peft) (23.1)\n",
+            "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from peft) (5.9.5)\n",
+            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from peft) (6.0.1)\n",
+            "Requirement already satisfied: torch>=1.13.0 in /usr/local/lib/python3.10/dist-packages (from peft) (2.0.1+cu118)\n",
+            "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (from peft) (4.31.0)\n",
+            "Requirement already satisfied: accelerate in /usr/local/lib/python3.10/dist-packages (from peft) (0.21.0)\n",
+            "Requirement already satisfied: safetensors in /usr/local/lib/python3.10/dist-packages (from peft) (0.3.1)\n",
+            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (3.12.2)\n",
+            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (4.7.1)\n",
+            "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (1.11.1)\n",
+            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (3.1)\n",
+            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (3.1.2)\n",
+            "Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft) (2.0.0)\n",
+            "Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.13.0->peft) (3.25.2)\n",
+            "Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.13.0->peft) (16.0.6)\n",
+            "Requirement already satisfied: huggingface-hub<1.0,>=0.14.1 in /usr/local/lib/python3.10/dist-packages (from transformers->peft) (0.16.4)\n",
+            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers->peft) (2022.10.31)\n",
+            "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers->peft) (2.27.1)\n",
+            "Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers->peft) (0.13.3)\n",
+            "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers->peft) (4.65.0)\n",
+            "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from huggingface-hub<1.0,>=0.14.1->transformers->peft) (2023.6.0)\n",
+            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.13.0->peft) (2.1.3)\n",
+            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers->peft) (1.26.16)\n",
+            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers->peft) (2023.7.22)\n",
+            "Requirement already satisfied: charset-normalizer~=2.0.0 in /usr/local/lib/python3.10/dist-packages (from requests->transformers->peft) (2.0.12)\n",
+            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers->peft) (3.4)\n",
+            "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.13.0->peft) (1.3.0)\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {
+        "id": "6YOhmSaCMK2M"
+      },
+      "outputs": [],
+      "source": [
+        "# from transformers import AutoModelForSeq2SeqLM\n",
+        "# from peft import get_peft_config, get_peft_model, LoraConfig, TaskType\n",
+        "# import torch\n",
+        "# model_name_or_path = \"microsoft/GODEL-v1_1-large-seq2seq\"\n",
+        "# tokenizer_name_or_path = \"microsoft/GODEL-v1_1-large-seq2seq\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from transformers import AutoModelForSeq2SeqLM, AutoTokenizer\n",
+        "\n",
+        "# Replace 'microsoft/GODEL-v1_1-large-seq2seq' with the model name\n",
+        "model_name = 'microsoft/GODEL-v1_1-large-seq2seq'\n",
+        "\n",
+        "# Load the model and tokenizer\n",
+        "model = AutoModelForSeq2SeqLM.from_pretrained(model_name)\n",
+        "tokenizer = AutoTokenizer.from_pretrained(model_name)"
+      ],
+      "metadata": {
+        "id": "r1zRNhfYXN8T"
+      },
+      "execution_count": 2,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Output directory\n",
+        "output_dir = \"medbot_model\"\n",
+        "\n",
+        "# Save the model and tokenizer using the standard Hugging Face naming convention\n",
+        "model.save_pretrained(output_dir)\n",
+        "tokenizer.save_pretrained(output_dir)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "UjV85bPQXw7P",
+        "outputId": "688d07cb-eddd-4a6a-819e-57efd837324b"
+      },
+      "execution_count": 15,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "('medbot_model/tokenizer_config.json',\n",
+              " 'medbot_model/special_tokens_map.json',\n",
+              " 'medbot_model/tokenizer.json')"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 15
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# # peft config\n",
+        "\n",
+        "# peft_config = LoraConfig(\n",
+        "#     task_type=TaskType.SEQ_2_SEQ_LM, inference_mode=False, r=6, lora_alpha=16, lora_dropout=0.2\n",
+        "# )"
+      ],
+      "metadata": {
+        "id": "qmIGSnctujOh"
+      },
+      "execution_count": 12,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# model = get_peft_model(model, peft_config)\n",
+        "# model.print_trainable_parameters()\n",
+        "\n",
+        "# output_dir = \"medbot_model_peft\"\n",
+        "\n",
+        "# model.save_pretrained(output_dir)\n",
+        "# tokenizer.save_pretrained(output_dir)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "RulB42QiMOhi",
+        "outputId": "e8e2d65d-8afa-4095-bf8b-93749e39b785"
+      },
+      "execution_count": 14,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "trainable params: 1,769,472 || all params: 739,410,944 || trainable%: 0.23930833244469804\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "('medbot_model_peft/tokenizer_config.json',\n",
+              " 'medbot_model_peft/special_tokens_map.json',\n",
+              " 'medbot_model_peft/tokenizer.json')"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 14
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# ============================== Load Dataset =========================="
+      ],
+      "metadata": {
+        "id": "Xj4K4WU-NYp8"
+      },
+      "execution_count": 8,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import pandas as pd\n",
+        "\n",
+        "df = pd.read_csv('/content/drive/MyDrive/Dataset/diseaseDataSetFull2.csv')\n",
+        "df"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 607
+        },
+        "id": "Wc8bxpybNgFo",
+        "outputId": "e5e960a5-e460-4b27-ea2f-bfb86dbbb06b"
+      },
+      "execution_count": 3,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "                                      disease  \\\n",
+              "0                            Fungal infection   \n",
+              "1                            Fungal infection   \n",
+              "2                            Fungal infection   \n",
+              "3                            Fungal infection   \n",
+              "4                            Fungal infection   \n",
+              "...                                       ...   \n",
+              "4915  (vertigo) Paroymsal  Positional Vertigo   \n",
+              "4916                                     Acne   \n",
+              "4917                  Urinary tract infection   \n",
+              "4918                                Psoriasis   \n",
+              "4919                                 Impetigo   \n",
+              "\n",
+              "                                               symptoms  \\\n",
+              "0     itching,skin_rash,nodal_skin_eruptions,dischro...   \n",
+              "1     skin_rash,nodal_skin_eruptions,dischromic__pat...   \n",
+              "2      itching,nodal_skin_eruptions,dischromic__patches   \n",
+              "3                 itching,skin_rash,dischromic__patches   \n",
+              "4                itching,skin_rash,nodal_skin_eruptions   \n",
+              "...                                                 ...   \n",
+              "4915  vomiting,headache,nausea,spinning_movements,lo...   \n",
+              "4916   skin_rash,pus_filled_pimples,blackheads,scurring   \n",
+              "4917  burning_micturition,bladder_discomfort,foul_sm...   \n",
+              "4918  skin_rash,joint_pain,skin_peeling,silver_like_...   \n",
+              "4919  skin_rash,high_fever,blister,red_sore_around_n...   \n",
+              "\n",
+              "                                            precautions  \n",
+              "0     bath twice, use detol or neem in bathing water...  \n",
+              "1     bath twice, use detol or neem in bathing water...  \n",
+              "2     bath twice, use detol or neem in bathing water...  \n",
+              "3     bath twice, use detol or neem in bathing water...  \n",
+              "4     bath twice, use detol or neem in bathing water...  \n",
+              "...                                                 ...  \n",
+              "4915  lie down, avoid sudden change in body, avoid a...  \n",
+              "4916  bath twice, avoid fatty spicy food, drink plen...  \n",
+              "4917  drink plenty of water, increase vitamin c inta...  \n",
+              "4918  wash hands with warm soapy water, stop bleedin...  \n",
+              "4919  soak affected area in warm water, use antibiot...  \n",
+              "\n",
+              "[4920 rows x 3 columns]"
+            ],
+            "text/html": [
+              "\n",
+              "\n",
+              "  <div id=\"df-d8eb48fc-bfd8-4158-80d6-468b1560edca\">\n",
+              "    <div class=\"colab-df-container\">\n",
+              "      <div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>disease</th>\n",
+              "      <th>symptoms</th>\n",
+              "      <th>precautions</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>Fungal infection</td>\n",
+              "      <td>itching,skin_rash,nodal_skin_eruptions,dischro...</td>\n",
+              "      <td>bath twice, use detol or neem in bathing water...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>Fungal infection</td>\n",
+              "      <td>skin_rash,nodal_skin_eruptions,dischromic__pat...</td>\n",
+              "      <td>bath twice, use detol or neem in bathing water...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>Fungal infection</td>\n",
+              "      <td>itching,nodal_skin_eruptions,dischromic__patches</td>\n",
+              "      <td>bath twice, use detol or neem in bathing water...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>Fungal infection</td>\n",
+              "      <td>itching,skin_rash,dischromic__patches</td>\n",
+              "      <td>bath twice, use detol or neem in bathing water...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>Fungal infection</td>\n",
+              "      <td>itching,skin_rash,nodal_skin_eruptions</td>\n",
+              "      <td>bath twice, use detol or neem in bathing water...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>...</th>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "      <td>...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4915</th>\n",
+              "      <td>(vertigo) Paroymsal  Positional Vertigo</td>\n",
+              "      <td>vomiting,headache,nausea,spinning_movements,lo...</td>\n",
+              "      <td>lie down, avoid sudden change in body, avoid a...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4916</th>\n",
+              "      <td>Acne</td>\n",
+              "      <td>skin_rash,pus_filled_pimples,blackheads,scurring</td>\n",
+              "      <td>bath twice, avoid fatty spicy food, drink plen...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4917</th>\n",
+              "      <td>Urinary tract infection</td>\n",
+              "      <td>burning_micturition,bladder_discomfort,foul_sm...</td>\n",
+              "      <td>drink plenty of water, increase vitamin c inta...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4918</th>\n",
+              "      <td>Psoriasis</td>\n",
+              "      <td>skin_rash,joint_pain,skin_peeling,silver_like_...</td>\n",
+              "      <td>wash hands with warm soapy water, stop bleedin...</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4919</th>\n",
+              "      <td>Impetigo</td>\n",
+              "      <td>skin_rash,high_fever,blister,red_sore_around_n...</td>\n",
+              "      <td>soak affected area in warm water, use antibiot...</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "<p>4920 rows × 3 columns</p>\n",
+              "</div>\n",
+              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-d8eb48fc-bfd8-4158-80d6-468b1560edca')\"\n",
+              "              title=\"Convert this dataframe to an interactive table.\"\n",
+              "              style=\"display:none;\">\n",
+              "\n",
+              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "       width=\"24px\">\n",
+              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
+              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
+              "  </svg>\n",
+              "      </button>\n",
+              "\n",
+              "\n",
+              "\n",
+              "    <div id=\"df-effcd91d-4d34-4d62-9ee1-b5487b3c0b00\">\n",
+              "      <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-effcd91d-4d34-4d62-9ee1-b5487b3c0b00')\"\n",
+              "              title=\"Suggest charts.\"\n",
+              "              style=\"display:none;\">\n",
+              "\n",
+              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
+              "     width=\"24px\">\n",
+              "    <g>\n",
+              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
+              "    </g>\n",
+              "</svg>\n",
+              "      </button>\n",
+              "    </div>\n",
+              "\n",
+              "<style>\n",
+              "  .colab-df-quickchart {\n",
+              "    background-color: #E8F0FE;\n",
+              "    border: none;\n",
+              "    border-radius: 50%;\n",
+              "    cursor: pointer;\n",
+              "    display: none;\n",
+              "    fill: #1967D2;\n",
+              "    height: 32px;\n",
+              "    padding: 0 0 0 0;\n",
+              "    width: 32px;\n",
+              "  }\n",
+              "\n",
+              "  .colab-df-quickchart:hover {\n",
+              "    background-color: #E2EBFA;\n",
+              "    box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "    fill: #174EA6;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart {\n",
+              "    background-color: #3B4455;\n",
+              "    fill: #D2E3FC;\n",
+              "  }\n",
+              "\n",
+              "  [theme=dark] .colab-df-quickchart:hover {\n",
+              "    background-color: #434B5C;\n",
+              "    box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "    filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "    fill: #FFFFFF;\n",
+              "  }\n",
+              "</style>\n",
+              "\n",
+              "    <script>\n",
+              "      async function quickchart(key) {\n",
+              "        const containerElement = document.querySelector('#' + key);\n",
+              "        const charts = await google.colab.kernel.invokeFunction(\n",
+              "            'suggestCharts', [key], {});\n",
+              "      }\n",
+              "    </script>\n",
+              "\n",
+              "      <script>\n",
+              "\n",
+              "function displayQuickchartButton(domScope) {\n",
+              "  let quickchartButtonEl =\n",
+              "    domScope.querySelector('#df-effcd91d-4d34-4d62-9ee1-b5487b3c0b00 button.colab-df-quickchart');\n",
+              "  quickchartButtonEl.style.display =\n",
+              "    google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "}\n",
+              "\n",
+              "        displayQuickchartButton(document);\n",
+              "      </script>\n",
+              "      <style>\n",
+              "    .colab-df-container {\n",
+              "      display:flex;\n",
+              "      flex-wrap:wrap;\n",
+              "      gap: 12px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert {\n",
+              "      background-color: #E8F0FE;\n",
+              "      border: none;\n",
+              "      border-radius: 50%;\n",
+              "      cursor: pointer;\n",
+              "      display: none;\n",
+              "      fill: #1967D2;\n",
+              "      height: 32px;\n",
+              "      padding: 0 0 0 0;\n",
+              "      width: 32px;\n",
+              "    }\n",
+              "\n",
+              "    .colab-df-convert:hover {\n",
+              "      background-color: #E2EBFA;\n",
+              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
+              "      fill: #174EA6;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert {\n",
+              "      background-color: #3B4455;\n",
+              "      fill: #D2E3FC;\n",
+              "    }\n",
+              "\n",
+              "    [theme=dark] .colab-df-convert:hover {\n",
+              "      background-color: #434B5C;\n",
+              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
+              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
+              "      fill: #FFFFFF;\n",
+              "    }\n",
+              "  </style>\n",
+              "\n",
+              "      <script>\n",
+              "        const buttonEl =\n",
+              "          document.querySelector('#df-d8eb48fc-bfd8-4158-80d6-468b1560edca button.colab-df-convert');\n",
+              "        buttonEl.style.display =\n",
+              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
+              "\n",
+              "        async function convertToInteractive(key) {\n",
+              "          const element = document.querySelector('#df-d8eb48fc-bfd8-4158-80d6-468b1560edca');\n",
+              "          const dataTable =\n",
+              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
+              "                                                     [key], {});\n",
+              "          if (!dataTable) return;\n",
+              "\n",
+              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
+              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
+              "            + ' to learn more about interactive tables.';\n",
+              "          element.innerHTML = '';\n",
+              "          dataTable['output_type'] = 'display_data';\n",
+              "          await google.colab.output.renderOutput(dataTable, element);\n",
+              "          const docLink = document.createElement('div');\n",
+              "          docLink.innerHTML = docLinkHtml;\n",
+              "          element.appendChild(docLink);\n",
+              "        }\n",
+              "      </script>\n",
+              "    </div>\n",
+              "  </div>\n"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 3
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def dataframe_to_dataset(df):\n",
+        "    \"\"\"\n",
+        "    Convert a DataFrame with columns 'disease', 'symptoms', and 'precautions'\n",
+        "    into a list of tuples dataset.\n",
+        "\n",
+        "    Parameters:\n",
+        "        df (pd.DataFrame): Input DataFrame with columns 'disease', 'symptoms', and 'precautions'.\n",
+        "\n",
+        "    Returns:\n",
+        "        list: A list of tuples, where each tuple contains information about a specific disease,\n",
+        "              symptoms, and precautions.\n",
+        "    \"\"\"\n",
+        "    if not all(col in df.columns for col in ['disease', 'symptoms', 'precautions']):\n",
+        "        raise ValueError(\"DataFrame must contain 'disease', 'symptoms', and 'precautions' columns.\")\n",
+        "\n",
+        "    dataset = []\n",
+        "    for _, row in df.iterrows():\n",
+        "        disease = row['disease']\n",
+        "        symptoms = row['symptoms']\n",
+        "        precautions = row['precautions']\n",
+        "        dataset.append((disease, symptoms, precautions))\n",
+        "\n",
+        "    return dataset\n",
+        "\n",
+        "data = dataframe_to_dataset(df)\n",
+        "data[:10]"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "rGbPHffgNi72",
+        "outputId": "48542ee0-d5ab-4cae-d984-1dae31c77bd4"
+      },
+      "execution_count": 4,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "[('Fungal infection',\n",
+              "  'itching,skin_rash,nodal_skin_eruptions,dischromic__patches',\n",
+              "  'bath twice, use detol or neem in bathing water, keep infected area dry, use clean cloths'),\n",
+              " ('Fungal infection',\n",
+              "  'skin_rash,nodal_skin_eruptions,dischromic__patches',\n",
+              "  'bath twice, use detol or neem in bathing water, keep infected area dry, use clean cloths'),\n",
+              " ('Fungal infection',\n",
+              "  'itching,nodal_skin_eruptions,dischromic__patches',\n",
+              "  'bath twice, use detol or neem in bathing water, keep infected area dry, use clean cloths'),\n",
+              " ('Fungal infection',\n",
+              "  'itching,skin_rash,dischromic__patches',\n",
+              "  'bath twice, use detol or neem in bathing water, keep infected area dry, use clean cloths'),\n",
+              " ('Fungal infection',\n",
+              "  'itching,skin_rash,nodal_skin_eruptions',\n",
+              "  'bath twice, use detol or neem in bathing water, keep infected area dry, use clean cloths'),\n",
+              " ('Fungal infection',\n",
+              "  'skin_rash,nodal_skin_eruptions,dischromic__patches',\n",
+              "  'bath twice, use detol or neem in bathing water, keep infected area dry, use clean cloths'),\n",
+              " ('Fungal infection',\n",
+              "  'itching,nodal_skin_eruptions,dischromic__patches',\n",
+              "  'bath twice, use detol or neem in bathing water, keep infected area dry, use clean cloths'),\n",
+              " ('Fungal infection',\n",
+              "  'itching,skin_rash,dischromic__patches',\n",
+              "  'bath twice, use detol or neem in bathing water, keep infected area dry, use clean cloths'),\n",
+              " ('Fungal infection',\n",
+              "  'itching,skin_rash,nodal_skin_eruptions',\n",
+              "  'bath twice, use detol or neem in bathing water, keep infected area dry, use clean cloths'),\n",
+              " ('Fungal infection',\n",
+              "  'itching,skin_rash,nodal_skin_eruptions,dischromic__patches',\n",
+              "  'bath twice, use detol or neem in bathing water, keep infected area dry, use clean cloths')]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 4
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "data[:10]"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "VOG5lBemvYei",
+        "outputId": "9b8013a4-8273-4a51-9f1e-59566c9d4892"
+      },
+      "execution_count": 5,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "[('Fungal infection',\n",
+              "  'itching,skin_rash,nodal_skin_eruptions,dischromic__patches',\n",
+              "  'bath twice, use detol or neem in bathing water, keep infected area dry, use clean cloths'),\n",
+              " ('Fungal infection',\n",
+              "  'skin_rash,nodal_skin_eruptions,dischromic__patches',\n",
+              "  'bath twice, use detol or neem in bathing water, keep infected area dry, use clean cloths'),\n",
+              " ('Fungal infection',\n",
+              "  'itching,nodal_skin_eruptions,dischromic__patches',\n",
+              "  'bath twice, use detol or neem in bathing water, keep infected area dry, use clean cloths'),\n",
+              " ('Fungal infection',\n",
+              "  'itching,skin_rash,dischromic__patches',\n",
+              "  'bath twice, use detol or neem in bathing water, keep infected area dry, use clean cloths'),\n",
+              " ('Fungal infection',\n",
+              "  'itching,skin_rash,nodal_skin_eruptions',\n",
+              "  'bath twice, use detol or neem in bathing water, keep infected area dry, use clean cloths'),\n",
+              " ('Fungal infection',\n",
+              "  'skin_rash,nodal_skin_eruptions,dischromic__patches',\n",
+              "  'bath twice, use detol or neem in bathing water, keep infected area dry, use clean cloths'),\n",
+              " ('Fungal infection',\n",
+              "  'itching,nodal_skin_eruptions,dischromic__patches',\n",
+              "  'bath twice, use detol or neem in bathing water, keep infected area dry, use clean cloths'),\n",
+              " ('Fungal infection',\n",
+              "  'itching,skin_rash,dischromic__patches',\n",
+              "  'bath twice, use detol or neem in bathing water, keep infected area dry, use clean cloths'),\n",
+              " ('Fungal infection',\n",
+              "  'itching,skin_rash,nodal_skin_eruptions',\n",
+              "  'bath twice, use detol or neem in bathing water, keep infected area dry, use clean cloths'),\n",
+              " ('Fungal infection',\n",
+              "  'itching,skin_rash,nodal_skin_eruptions,dischromic__patches',\n",
+              "  'bath twice, use detol or neem in bathing water, keep infected area dry, use clean cloths')]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 5
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# =============================== Training  ====================================="
+      ],
+      "metadata": {
+        "id": "2AoLTRuhNSyp"
+      },
+      "execution_count": 7,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AdamW\n",
+        "from torch.utils.data import Dataset, DataLoader\n",
+        "import torch\n",
+        "\n",
+        "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
+        "model = model.to(device)\n",
+        "\n",
+        "# Sample Data\n",
+        "sample_data = data\n",
+        "\n",
+        "class CustomDataset(Dataset):\n",
+        "    def __init__(self, data, tokenizer, max_length):\n",
+        "        self.data = data\n",
+        "        self.tokenizer = tokenizer\n",
+        "        self.max_length = max_length\n",
+        "\n",
+        "    def __len__(self):\n",
+        "        return len(self.data)\n",
+        "\n",
+        "    def __getitem__(self, index):\n",
+        "        disease, symptoms, precautions = self.data[index]\n",
+        "        source_text = f\"I am feeling {symptoms}\"\n",
+        "        target_text = f\"You might have {disease}, the precautions are {precautions}\"\n",
+        "\n",
+        "        # Tokenize the source and target texts separately\n",
+        "        source_tokens = self.tokenizer(source_text, padding=\"max_length\", max_length=self.max_length, return_tensors=\"pt\")\n",
+        "        target_tokens = self.tokenizer(target_text, padding=\"max_length\", max_length=self.max_length, return_tensors=\"pt\")\n",
+        "\n",
+        "        # Prepare the inputs and labels for the Seq2Seq model\n",
+        "        input_ids = source_tokens.input_ids.squeeze()\n",
+        "        attention_mask = source_tokens.attention_mask.squeeze()\n",
+        "        labels = target_tokens.input_ids.squeeze()\n",
+        "\n",
+        "        return {\n",
+        "            \"input_ids\": input_ids,\n",
+        "            \"attention_mask\": attention_mask,\n",
+        "            \"labels\": labels,\n",
+        "        }\n",
+        "\n",
+        "def fine_tune_and_save_model(model, tokenizer):\n",
+        "    # Load tokenizer and create dataset\n",
+        "    # checkpoint = \"microsoft/GODEL-v1_1-large-seq2seq\"\n",
+        "    # tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
+        "    max_length = 128  # You can adjust this based on your input sequence length requirements\n",
+        "    dataset = CustomDataset(sample_data, tokenizer, max_length)\n",
+        "\n",
+        "    # Data loader\n",
+        "    batch_size = 2\n",
+        "    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)\n",
+        "\n",
+        "    # Load the model\n",
+        "    # from the parameter\n",
+        "\n",
+        "    # Hyperparameters\n",
+        "    learning_rate = 2e-5\n",
+        "    num_epochs = 2\n",
+        "    num_warmup_steps = 100\n",
+        "\n",
+        "    optimizer = AdamW(model.parameters(), lr=learning_rate)\n",
+        "\n",
+        "    # Training loop\n",
+        "    model.train()\n",
+        "    for epoch in range(num_epochs):\n",
+        "        total_loss = 0.0\n",
+        "        for batch in dataloader:\n",
+        "            optimizer.zero_grad()\n",
+        "\n",
+        "            input_ids = batch[\"input_ids\"]\n",
+        "            attention_mask = batch[\"attention_mask\"]\n",
+        "            labels = batch[\"labels\"]\n",
+        "\n",
+        "            input_ids = batch[\"input_ids\"].to(device)\n",
+        "            attention_mask = batch[\"attention_mask\"].to(device)\n",
+        "            labels = batch[\"labels\"].to(device)\n",
+        "\n",
+        "\n",
+        "            outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)\n",
+        "            loss = outputs.loss\n",
+        "            total_loss += loss.item()\n",
+        "\n",
+        "            loss.backward()\n",
+        "            optimizer.step()\n",
+        "\n",
+        "        average_loss = total_loss / len(dataloader)\n",
+        "        print(f\"Epoch {epoch+1}/{num_epochs} - Average Loss: {average_loss:.4f}\")\n",
+        "\n",
+        "    # Save the fine-tuned model and tokenizer\n",
+        "    output_dir = \"medbot_model_epoch3_s512\"\n",
+        "    model.save_pretrained(output_dir)\n",
+        "    tokenizer.save_pretrained(output_dir)"
+      ],
+      "metadata": {
+        "id": "4COYhQqYM0ni"
+      },
+      "execution_count": 6,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "fine_tune_and_save_model(model, tokenizer)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "j3p4lBPbOZZP",
+        "outputId": "91397825-200d-4e4a-f4e7-29e9df6c040c"
+      },
+      "execution_count": 7,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.10/dist-packages/transformers/optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
+            "  warnings.warn(\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Epoch 1/2 - Average Loss: 0.1588\n",
+            "Epoch 2/2 - Average Loss: 0.0038\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "M9dy5RBfRcCH"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,107 @@

+{
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "eos_token": "</s>",
+  "pad_token": "<PAD>",
+  "unk_token": "<unk>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,111 @@

+{
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "</s>",
+  "extra_ids": 100,
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "tokenizer_class": "T5Tokenizer",
+  "unk_token": "<unk>"
+}